Initial commit

This commit is contained in:
tom5079
2019-05-11 22:59:12 +09:00
commit 2a7814798b
134 changed files with 2735 additions and 0 deletions

1
libpupil/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/build

40
libpupil/build.gradle Normal file
View File

@@ -0,0 +1,40 @@
apply plugin: 'java-library'
apply plugin: 'kotlin'
apply plugin: 'kotlinx-serialization'
dependencies {
implementation fileTree(dir: 'libs', include: ['*.jar'])
implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk7:$kotlin_version"
implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.2.1'
implementation "org.jetbrains.kotlinx:kotlinx-serialization-runtime:0.11.0"
implementation 'org.jsoup:jsoup:1.11.3'
testImplementation 'junit:junit:4.12'
}
sourceCompatibility = "7"
targetCompatibility = "7"
buildscript {
ext.kotlin_version = '1.3.31'
repositories {
mavenCentral()
}
dependencies {
classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version"
classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version"
classpath "org.jetbrains.kotlin:kotlin-serialization:$kotlin_version"
}
}
repositories {
mavenCentral()
}
compileKotlin {
kotlinOptions {
jvmTarget = "1.8"
freeCompilerArgs += '-Xuse-experimental=kotlin.Experimental'
}
}
compileTestKotlin {
kotlinOptions {
jvmTarget = "1.8"
}
}

View File

@@ -0,0 +1,62 @@
package xyz.quaver.hitomi
const val protocol = "https:"
fun String.toTag() : String {
if (this.indexOf(':') > -1) {
val split = this.split(':')
val field = split[0]
val term = split[1]
when(field) {
"male" -> return "$term"
"female" -> return "$term"
}
}
return this
}
//common.js
var adapose = false
const val numberOfFrontends = 2
const val domain = "ltn.hitomi.la"
const val galleryblockdir = "galleryblock"
const val nozomiextension = ".nozomi"
fun subdomainFromGalleryID(g: Int) : String {
if (adapose)
return "0"
val o = g % numberOfFrontends
return (97+o).toChar().toString()
}
fun subdomainFromURL(url: String, base: String? = null) : String {
var retval = "a"
if (base != null)
retval = base
val r = Regex("""/\d*(\d)/""")
val m = r.find(url)
m ?: return retval
var g = m.groups[1]!!.value.toIntOrNull()
g ?: return retval
if (g == 1)
g = 0
retval = subdomainFromGalleryID(g) + retval
return retval
}
fun urlFromURL(url: String, base: String? = null) : String {
return url.replace(Regex("//..?\\.hitomi\\.la/"), "//${subdomainFromURL(url, base)}.hitomi.la/")
}

View File

@@ -0,0 +1,3 @@
package xyz.quaver.hitomi
//download.js

View File

@@ -0,0 +1,61 @@
package xyz.quaver.hitomi
import org.jsoup.Jsoup
import java.net.URL
data class Gallery(
val related: List<Int>,
val langList: List<Pair<String, String>>,
val cover: URL,
val title: String,
val artists: List<String>,
val groups: List<String>,
val type: String,
val language: String,
val series: List<String>,
val characters: List<String>,
val tags: List<String>,
val thumbnails: List<URL>
)
fun getGallery(galleryID: Int) : Gallery {
val url = "https://hitomi.la/galleries/$galleryID.html"
val doc = Jsoup.connect(url).get()
val related = Regex("\\d+")
.findAll(doc.select("script").first().html())
.map {
it.value.toInt()
}.toList()
val langList = doc.select("#lang-list a").map {
Pair(it.text(), it.attr("href").replace(".html", ""))
}
val cover = URL(protocol + doc.selectFirst(".cover img").attr("src"))
val title = doc.selectFirst(".gallery h1 a").text()
val artists = doc.select(".gallery h2 a").map { it.text() }
val groups = doc.select(".gallery-info a[href~=^/group/]").map { it.text() }
val type = doc.selectFirst(".gallery-info a[href~=^/type/]").text()
val language = {
val href = doc.select(".gallery-info a[href~=^/index-.+-1.html]").attr("href")
href.slice(7 until href.indexOf("-1"))
}.invoke()
val series = doc.select(".gallery-info a[href~=^/series/]").map { it.text() }
val characters = doc.select(".gallery-info a[href~=^/character/]").map { it.text() }
val tags = doc.select(".gallery-info a[href~=^/tag/]").map {
val href = it.attr("href")
href.slice(5 until href.indexOf('-'))
}
val thumbnails = Regex("'(//tn.hitomi.la/smalltn/\\d+/\\d+.+)',")
.findAll(doc.select("script").last().html())
.map {
URL(protocol + it.groups[1]!!.value)
}.toList()
return Gallery(related, langList, cover, title, artists, groups, type, language, series, characters, tags, thumbnails)
}

View File

@@ -0,0 +1,79 @@
package xyz.quaver.hitomi
import org.jsoup.Jsoup
import java.net.URL
import java.net.URLDecoder
import java.nio.ByteBuffer
import java.nio.ByteOrder
import java.util.*
import javax.net.ssl.HttpsURLConnection
//galleryblock.js
fun fetchNozomi(area: String? = null, tag: String = "index", language: String = "all", start: Int = -1, count: Int = -1) : List<Int> {
val url =
when(area) {
null -> "$protocol//$domain/$tag-$language$nozomiextension"
else -> "$protocol//$domain/$area/$tag-$language$nozomiextension"
}
try {
with(URL(url).openConnection() as HttpsURLConnection) {
requestMethod = "GET"
if (start != -1 && count != -1) {
val startByte = start*4
val endByte = (start+count)*4-1
setRequestProperty("Range", "bytes=$startByte-$endByte")
}
val nozomi = ArrayList<Int>()
val arrayBuffer = ByteBuffer
.wrap(inputStream.readBytes())
.order(ByteOrder.BIG_ENDIAN)
while (arrayBuffer.hasRemaining())
nozomi.add(arrayBuffer.int)
return nozomi
}
} catch (e: Exception) {
return listOf()
}
}
data class GalleryBlock(
val id: Int,
val thumbnails: List<URL>,
val title: String,
val artists: List<String>,
val series: List<String>,
val type: String,
val language: String,
val relatedTags: List<String>
)
fun getGalleryBlock(galleryID: Int) : GalleryBlock {
val url = "$protocol//$domain/$galleryblockdir/$galleryID$extension"
val doc = Jsoup.connect(url).get()
val thumbnails = doc.select("img").map { URL(protocol + it.attr("data-src")) }
val title = doc.selectFirst("h1.lillie > a").text()
val artists = doc.select("div.artist-list a").map{ it.text() }
val series = doc.select("a[href~=^/series/]").map { it.text() }
val type = doc.selectFirst("a[href~=^/type/]").text()
val language = {
val href = doc.select("a[href~=^/index-.+-1.html]").attr("href")
href.slice(7 until href.indexOf("-1"))
}.invoke()
val relatedTags = doc.select(".relatedtags a").map {
val href = URLDecoder.decode(it.attr("href"), "UTF-8")
href.slice(5 until href.indexOf('-'))
}
return GalleryBlock(galleryID, thumbnails, title, artists, series, type, language, relatedTags)
}

View File

@@ -0,0 +1,50 @@
package xyz.quaver.hitomi
import kotlinx.serialization.ImplicitReflectionSerializer
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.Json
import kotlinx.serialization.parseList
import org.jsoup.Jsoup
import java.net.URL
fun getReferer(galleryID: Int) = "https://hitomi.la/reader/$galleryID.html"
@Serializable
data class GalleryInfo(
val width: Int,
val haswebp: Int,
val name: String,
val height: Int
)
data class Reader(
val title: String,
val images: List<Pair<URL, GalleryInfo?>>
)
//Set header `Referer` to reader url to avoid 403 error
@UseExperimental(ImplicitReflectionSerializer::class)
fun getReader(galleryID: Int) : Reader {
val readerUrl = "https://hitomi.la/reader/$galleryID.html"
val galleryInfoUrl = "https://ltn.hitomi.la/galleries/$galleryID.js"
val doc = Jsoup.connect(readerUrl).get()
val title = doc.selectFirst("title").text()
val images = doc.select(".img-url").map {
URL(protocol + urlFromURL(it.text()))
}
val galleryInfo = ArrayList<GalleryInfo?>()
galleryInfo.addAll(Json.parseList(
Regex("""\[.+\]""").find(
URL(galleryInfoUrl).readText()
)?.value ?: "[]"
)
)
if (images.size > galleryInfo.size)
galleryInfo.addAll(arrayOfNulls(images.size - galleryInfo.size))
return Reader(title, images zip galleryInfo)
}

View File

@@ -0,0 +1,73 @@
package xyz.quaver.hitomi
import kotlinx.coroutines.asCoroutineDispatcher
import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
import java.util.*
import java.util.concurrent.Executors
val searchDispatcher = Executors.newFixedThreadPool(4).asCoroutineDispatcher()
fun doSearch(query: String) : List<Int> {
val terms = query
.trim()
.replace(Regex("""^\?"""), "")
.replace('_', ' ')
.toLowerCase()
.split(Regex("\\s+"))
val results = ArrayList<Int>()
val positiveTerms = LinkedList<String>()
val negativeTerms = LinkedList<String>()
for (term in terms) {
if (term.matches(Regex("^-.+")))
negativeTerms.push(term.replace(Regex("^-"), ""))
else
positiveTerms.push(term)
}
//first results
results.addAll(
if (positiveTerms.isEmpty())
getGalleryIDsFromNozomi(null, "index", "all")
else
getGalleryIDsForQuery(positiveTerms.poll())
)
runBlocking {
@Synchronized fun filterPositive(newResults: List<Int>) {
results.filter { newResults.binarySearch(it) >= 0 }.let {
results.clear()
results.addAll(it)
}
}
@Synchronized fun filterNegative(newResults: List<Int>) {
results.filterNot { newResults.binarySearch(it) >= 0 }.let {
results.clear()
results.addAll(it)
}
}
//positive results
positiveTerms.map {
launch(searchDispatcher) {
filterPositive(getGalleryIDsForQuery(it).reversed())
}
}.forEach {
it.join()
}
//negative results
negativeTerms.map {
launch(searchDispatcher) {
filterNegative(getGalleryIDsForQuery(it).reversed())
}
}.forEach {
it.join()
}
}
return results
}

View File

@@ -0,0 +1,317 @@
package xyz.quaver.hitomi
import java.net.URL
import java.nio.ByteBuffer
import java.nio.ByteOrder
import java.security.MessageDigest
import javax.net.ssl.HttpsURLConnection
//searchlib.js
const val separator = "-"
const val extension = ".html"
const val index_dir = "tagindex"
const val galleries_index_dir = "galleriesindex"
const val max_node_size = 464
const val B = 16
const val compressed_nozomi_prefix = "n"
val tag_index_version = getIndexVersion("tagindex")
val galleries_index_version = getIndexVersion("galleriesindex")
fun sha256(data: ByteArray) : ByteArray {
return MessageDigest.getInstance("SHA-256").digest(data)
}
@UseExperimental(ExperimentalUnsignedTypes::class)
fun hashTerm(term: String) : UByteArray {
return sha256(term.toByteArray()).toUByteArray().sliceArray(0 until 4)
}
fun sanitize(input: String) : String {
return input.replace(Regex("[/#]"), "")
}
fun getIndexVersion(name: String) : String {
return URL("$protocol//$domain/$name/version?_=${System.currentTimeMillis()}")
.readText()
}
//search.js
fun getGalleryIDsForQuery(query: String) : List<Int> {
query.replace("_", " ").let {
if (it.indexOf(':') > -1) {
val sides = it.split(":")
val ns = sides[0]
var tag = sides[1]
var area : String? = ns
var language = "all"
when (ns) {
"female", "male" -> {
area = "tag"
tag = it
}
"language" -> {
area = null
language = tag
tag = "index"
}
}
return getGalleryIDsFromNozomi(area, tag, language)
}
val key = hashTerm(it)
val field = "galleries"
val node = getNodeAtAddress(field, 0) ?: return listOf()
val data = bSearch(field, key, node)
if (data != null)
return getGalleryIDsFromData(data)
return arrayListOf()
}
}
fun getSuggestionsForQuery(query: String) : List<Suggestion> {
query.replace('_', ' ').let {
var field = "global"
var term = it
if (term.indexOf(':') > -1) {
val sides = it.split(':')
field = sides[0]
term = sides[1]
}
val key = hashTerm(term)
val node = getNodeAtAddress(field, 0) ?: return listOf()
val data = bSearch(field, key, node)
if (data != null)
return getSuggestionsFromData(field, data)
return listOf()
}
}
data class Suggestion(val s: String, val t: Int, val u: String, val n: String)
fun getSuggestionsFromData(field: String, data: Pair<Long, Int>) : List<Suggestion> {
val url = "$protocol//$domain/$index_dir/$field.$tag_index_version.data"
val (offset, length) = data
if (length > 10000 || length <= 0)
throw Exception("length $length is too long")
val inbuf = getURLAtRange(url, offset.until(offset+length)) ?: return listOf()
val suggestions = ArrayList<Suggestion>()
val buffer = ByteBuffer
.wrap(inbuf)
.order(ByteOrder.BIG_ENDIAN)
val numberOfSuggestions = buffer.int
if (numberOfSuggestions > 100 || numberOfSuggestions <= 0)
throw Exception("number of suggestions $numberOfSuggestions is too long")
for (i in 0.until(numberOfSuggestions)) {
var top = buffer.int
val ns = inbuf.sliceArray(buffer.position().until(buffer.position()+top)).toString(charset("UTF-8"))
buffer.position(buffer.position()+top)
top = buffer.int
val tag = inbuf.sliceArray(buffer.position().until(buffer.position()+top)).toString(charset("UTF-8"))
buffer.position(buffer.position()+top)
val count = buffer.int
val tagname = sanitize(tag)
val u =
when(ns) {
"female", "male" -> "/tag/$ns:$tagname${separator}1$extension"
"language" -> "/index-$tagname${separator}1$extension"
else -> "/$ns/$tagname${separator}all${separator}1$extension"
}
suggestions.add(Suggestion(tag, count, u, ns))
}
return suggestions
}
fun getGalleryIDsFromNozomi(area: String?, tag: String, language: String) : List<Int> {
val nozomiAddress =
when(area) {
null -> "$protocol//$domain/$compressed_nozomi_prefix/$tag-$language$nozomiextension"
else -> "$protocol//$domain/$compressed_nozomi_prefix/$area/$tag-$language$nozomiextension"
}
try {
with (URL(nozomiAddress).openConnection() as HttpsURLConnection) {
requestMethod = "GET"
val nozomi = ArrayList<Int>()
val arrayBuffer = ByteBuffer
.wrap(inputStream.readBytes())
.order(ByteOrder.BIG_ENDIAN)
while (arrayBuffer.hasRemaining())
nozomi.add(arrayBuffer.int)
return nozomi
}
} catch (e: Exception) {
return listOf()
}
}
fun getGalleryIDsFromData(data: Pair<Long, Int>) : List<Int> {
val url = "$protocol//$domain/$galleries_index_dir/galleries.$galleries_index_version.data"
val (offset, length) = data
if (length > 100000000 || length <= 0)
throw Exception("length $length is too long")
val inbuf = getURLAtRange(url, offset.until(offset+length)) ?: return listOf()
val galleryIDs = ArrayList<Int>()
val buffer = ByteBuffer
.wrap(inbuf)
.order(ByteOrder.BIG_ENDIAN)
val numberOfGalleryIDs = buffer.int
val expectedLength = numberOfGalleryIDs*4+4
if (numberOfGalleryIDs > 10000000 || numberOfGalleryIDs <= 0)
throw Exception("number_of_galleryids $numberOfGalleryIDs is too long")
else if (inbuf.size != expectedLength)
throw Exception("inbuf.byteLength ${inbuf.size} != expected_length $expectedLength")
for (i in 0.until(numberOfGalleryIDs))
galleryIDs.add(buffer.int)
return galleryIDs
}
fun getNodeAtAddress(field: String, address: Long) : Node? {
val url =
when(field) {
"galleries" -> "$protocol//$domain/$galleries_index_dir/galleries.$galleries_index_version.index"
else -> "$protocol//$domain/$index_dir/$field.$tag_index_version.index"
}
val nodedata = getURLAtRange(url, address.until(address+max_node_size)) ?: return null
return decodeNode(nodedata)
}
fun getURLAtRange(url: String, range: LongRange) : ByteArray? {
try {
with (URL(url).openConnection() as HttpsURLConnection) {
requestMethod = "GET"
setRequestProperty("Range", "bytes=${range.first}-${range.last}")
return inputStream.readBytes()
}
} catch (e: Exception) {
return null
}
}
@UseExperimental(ExperimentalUnsignedTypes::class)
data class Node(val keys: List<UByteArray>, val datas: List<Pair<Long, Int>>, val subNodeAddresses: List<Long>)
@UseExperimental(ExperimentalUnsignedTypes::class)
fun decodeNode(data: ByteArray) : Node {
val buffer = ByteBuffer
.wrap(data)
.order(ByteOrder.BIG_ENDIAN)
val uData = data.toUByteArray()
val numberOfKeys = buffer.int
val keys = ArrayList<UByteArray>()
for (i in 0.until(numberOfKeys)) {
val keySize = buffer.int
if (keySize == 0 || keySize > 32)
throw Exception("fatal: !keySize || keySize > 32")
keys.add(uData.sliceArray(buffer.position().until(buffer.position()+keySize)))
buffer.position(buffer.position()+keySize)
}
val numberOfDatas = buffer.int
val datas = ArrayList<Pair<Long, Int>>()
for (i in 0.until(numberOfDatas)) {
val offset = buffer.long
val length = buffer.int
datas.add(Pair(offset, length))
}
val numberOfSubNodeAddresses = B+1
val subNodeAddresses = ArrayList<Long>()
for (i in 0.until(numberOfSubNodeAddresses)) {
val subNodeAddress = buffer.long
subNodeAddresses.add(subNodeAddress)
}
return Node(keys, datas, subNodeAddresses)
}
@UseExperimental(ExperimentalUnsignedTypes::class)
fun bSearch(field: String, key: UByteArray, node: Node) : Pair<Long, Int>? {
fun compareArrayBuffers(dv1: UByteArray, dv2: UByteArray) : Int {
val top = Math.min(dv1.size, dv2.size)
for (i in 0.until(top)) {
if (dv1[i] < dv2[i])
return -1
else if (dv1[i] > dv2[i])
return 1
}
return 0
}
fun locateKey(key: UByteArray, node: Node) : Pair<Boolean, Int> {
for (i in 0 until node.keys.size) {
val cmpResult = compareArrayBuffers(key, node.keys[i])
if (cmpResult <= 0)
return Pair(cmpResult==0, i)
}
return Pair(false, node.keys.size)
}
fun isLeaf(node: Node) : Boolean {
for (subnode in node.subNodeAddresses)
if (subnode != 0L)
return false
return true
}
if (node.keys.isEmpty())
return null
val (there, where) = locateKey(key, node)
if (there)
return node.datas[where]
else if (isLeaf(node))
return null
val nextNode = getNodeAtAddress(field, node.subNodeAddresses[where]) ?: return null
return bSearch(field, key, nextNode)
}

View File

@@ -0,0 +1,64 @@
package xyz.quaver.hitomi
import org.junit.Test
import java.net.URL
class UnitTest {
@Test
fun test() {
val url = URL("https://ltn.hitomi.la/galleries/1411672.js")
print(url.path.substring(url.path.lastIndexOf('/')+1))
}
@Test
fun test_nozomi() {
val nozomi = fetchNozomi(start = 0, count = 5)
for (n in nozomi)
println(n)
}
@Test
fun test_search() {
val ids = getGalleryIDsForQuery("female:loli").reversed()
for (i in 0..100)
println(ids[i])
}
@Test
fun test_suggestions() {
val suggestions = getSuggestionsForQuery("language:g")
print(suggestions)
}
@Test
fun test_doSearch() {
val r = doSearch("type:artistcg language:korean female:loli female:mind_break -female:anal")
print(r.size)
}
@Test
fun test_getBlock() {
val galleryBlock = getGalleryBlock(1405716)
print(galleryBlock)
}
@Test
fun test_getGallery() {
val gallery = getGallery(1405267)
print(gallery)
}
@Test
fun test_getReader() {
val reader = getReader(1404693)
print(reader)
}
}