/* * Copyright 2019 tom5079 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package xyz.quaver.pupil.hitomi import okhttp3.Request import xyz.quaver.pupil.client import java.net.URL import java.nio.ByteBuffer import java.nio.ByteOrder import java.security.MessageDigest import kotlin.math.min //searchlib.js const val separator = "-" const val extension = ".html" const val index_dir = "tagindex" const val galleries_index_dir = "galleriesindex" const val max_node_size = 464 const val B = 16 const val compressed_nozomi_prefix = "n" val tag_index_version: String by lazy { getIndexVersion("tagindex") } val galleries_index_version: String by lazy { getIndexVersion("galleriesindex") } fun sha256(data: ByteArray) : ByteArray { return MessageDigest.getInstance("SHA-256").digest(data) } @OptIn(ExperimentalUnsignedTypes::class) fun hashTerm(term: String) : UByteArray { return sha256(term.toByteArray()).toUByteArray().sliceArray(0 until 4) } fun sanitize(input: String) : String { return input.replace(Regex("[/#]"), "") } fun getIndexVersion(name: String) = URL("$protocol//$domain/$name/version?_=${System.currentTimeMillis()}").readText() //search.js fun getGalleryIDsForQuery(query: String) : Set { query.replace("_", " ").let { if (it.indexOf(':') > -1) { val sides = it.split(":") val ns = sides[0] var tag = sides[1] var area : String? = ns var language = "all" when (ns) { "female", "male" -> { area = "tag" tag = it } "language" -> { area = null language = tag tag = "index" } } return getGalleryIDsFromNozomi(area, tag, language) } val key = hashTerm(it) val field = "galleries" val node = getNodeAtAddress(field, 0) ?: return emptySet() val data = bSearch(field, key, node) if (data != null) return getGalleryIDsFromData(data) return emptySet() } } fun getSuggestionsForQuery(query: String) : List { query.replace('_', ' ').let { var field = "global" var term = it if (term.indexOf(':') > -1) { val sides = it.split(':') field = sides[0] term = sides[1] } val key = hashTerm(term) val node = getNodeAtAddress(field, 0) ?: return emptyList() val data = bSearch(field, key, node) if (data != null) return getSuggestionsFromData(field, data) return emptyList() } } data class Suggestion(val s: String, val t: Int, val u: String, val n: String) fun getSuggestionsFromData(field: String, data: Pair) : List { val url = "$protocol//$domain/$index_dir/$field.$tag_index_version.data" val (offset, length) = data if (length > 10000 || length <= 0) throw Exception("length $length is too long") val inbuf = getURLAtRange(url, offset.until(offset+length)) val suggestions = ArrayList() val buffer = ByteBuffer .wrap(inbuf) .order(ByteOrder.BIG_ENDIAN) val numberOfSuggestions = buffer.int if (numberOfSuggestions > 100 || numberOfSuggestions <= 0) throw Exception("number of suggestions $numberOfSuggestions is too long") for (i in 0.until(numberOfSuggestions)) { var top = buffer.int val ns = inbuf.sliceArray(buffer.position().until(buffer.position()+top)).toString(charset("UTF-8")) buffer.position(buffer.position()+top) top = buffer.int val tag = inbuf.sliceArray(buffer.position().until(buffer.position()+top)).toString(charset("UTF-8")) buffer.position(buffer.position()+top) val count = buffer.int val tagname = sanitize(tag) val u = when(ns) { "female", "male" -> "/tag/$ns:$tagname${separator}1$extension" "language" -> "/index-$tagname${separator}1$extension" else -> "/$ns/$tagname${separator}all${separator}1$extension" } suggestions.add(Suggestion(tag, count, u, ns)) } return suggestions } fun getGalleryIDsFromNozomi(area: String?, tag: String, language: String) : Set { val nozomiAddress = when(area) { null -> "$protocol//$domain/$compressed_nozomi_prefix/$tag-$language$nozomiextension" else -> "$protocol//$domain/$compressed_nozomi_prefix/$area/$tag-$language$nozomiextension" } val bytes = try { URL(nozomiAddress).readBytes() } catch (e: Exception) { return emptySet() } val nozomi = mutableSetOf() val arrayBuffer = ByteBuffer .wrap(bytes) .order(ByteOrder.BIG_ENDIAN) while (arrayBuffer.hasRemaining()) nozomi.add(arrayBuffer.int) return nozomi } fun getGalleryIDsFromData(data: Pair) : Set { val url = "$protocol//$domain/$galleries_index_dir/galleries.$galleries_index_version.data" val (offset, length) = data if (length > 100000000 || length <= 0) throw Exception("length $length is too long") val inbuf = getURLAtRange(url, offset.until(offset+length)) val galleryIDs = mutableSetOf() val buffer = ByteBuffer .wrap(inbuf) .order(ByteOrder.BIG_ENDIAN) val numberOfGalleryIDs = buffer.int val expectedLength = numberOfGalleryIDs*4+4 if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0) throw Exception("number_of_galleryids $numberOfGalleryIDs is too long") else if (inbuf.size != expectedLength) throw Exception("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength") for (i in 0.until(numberOfGalleryIDs)) galleryIDs.add(buffer.int) return galleryIDs } fun getNodeAtAddress(field: String, address: Long) : Node? { val url = when(field) { "galleries" -> "$protocol//$domain/$galleries_index_dir/galleries.$galleries_index_version.index" "languages" -> "$protocol//$domain/$galleries_index_dir/languages.$galleries_index_version.index" "nozomiurl" -> "$protocol//$domain/$galleries_index_dir/nozomiurl.$galleries_index_version.index" else -> "$protocol//$domain/$index_dir/$field.$tag_index_version.index" } val nodedata = getURLAtRange(url, address.until(address+ max_node_size)) return decodeNode(nodedata) } fun getURLAtRange(url: String, range: LongRange) : ByteArray { val request = Request.Builder() .url(url) .header("Range", "bytes=${range.first}-${range.last}") .build() return client.newCall(request).execute().body()?.use { it.bytes() } ?: byteArrayOf() } @OptIn(ExperimentalUnsignedTypes::class) data class Node(val keys: List, val datas: List>, val subNodeAddresses: List) @OptIn(ExperimentalUnsignedTypes::class) fun decodeNode(data: ByteArray) : Node { val buffer = ByteBuffer .wrap(data) .order(ByteOrder.BIG_ENDIAN) val uData = data.toUByteArray() val numberOfKeys = buffer.int val keys = ArrayList() for (i in 0.until(numberOfKeys)) { val keySize = buffer.int if (keySize == 0 || keySize > 32) throw Exception("fatal: !keySize || keySize > 32") keys.add(uData.sliceArray(buffer.position().until(buffer.position()+keySize))) buffer.position(buffer.position()+keySize) } val numberOfDatas = buffer.int val datas = ArrayList>() for (i in 0.until(numberOfDatas)) { val offset = buffer.long val length = buffer.int datas.add(Pair(offset, length)) } val numberOfSubNodeAddresses = B +1 val subNodeAddresses = ArrayList() for (i in 0.until(numberOfSubNodeAddresses)) { val subNodeAddress = buffer.long subNodeAddresses.add(subNodeAddress) } return Node(keys, datas, subNodeAddresses) } @OptIn(ExperimentalUnsignedTypes::class) fun bSearch(field: String, key: UByteArray, node: Node) : Pair? { fun compareArrayBuffers(dv1: UByteArray, dv2: UByteArray) : Int { val top = min(dv1.size, dv2.size) for (i in 0.until(top)) { if (dv1[i] < dv2[i]) return -1 else if (dv1[i] > dv2[i]) return 1 } return 0 } fun locateKey(key: UByteArray, node: Node) : Pair { for (i in node.keys.indices) { val cmpResult = compareArrayBuffers(key, node.keys[i]) if (cmpResult <= 0) return Pair(cmpResult==0, i) } return Pair(false, node.keys.size) } fun isLeaf(node: Node) : Boolean { for (subnode in node.subNodeAddresses) if (subnode != 0L) return false return true } if (node.keys.isEmpty()) return null val (there, where) = locateKey(key, node) if (there) return node.datas[where] else if (isLeaf(node)) return null val nextNode = getNodeAtAddress(field, node.subNodeAddresses[where]) ?: return null return bSearch(field, key, nextNode) }