diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..dbe9c82b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.vscode/ \ No newline at end of file diff --git a/en/tags.json b/en/tags.json deleted file mode 100644 index c5e02bfb..00000000 --- a/en/tags.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "test": "test" -} \ No newline at end of file diff --git a/ko.json b/ko.json new file mode 100644 index 00000000..e69de29b diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 00000000..a98ae430 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,2 @@ +requests +beautifulsoup4 \ No newline at end of file diff --git a/scripts/tags.py b/scripts/tags.py new file mode 100644 index 00000000..5d9d626e --- /dev/null +++ b/scripts/tags.py @@ -0,0 +1,48 @@ +# Returns a list of all tags sorted by the number of tags + +import re +import json + +import requests +from bs4 import BeautifulSoup + +indices = ['123'] + [chr(ord('a')+i) for i in range(26)] +tags = dict() + +count_regex = re.compile(r".+\((\d+)\)$") + +for index in indices: + url = f'https://hitomi.la/alltags-{index}.html' + + soup = BeautifulSoup(requests.get(url).text, 'html.parser') + + for item in soup.select('.content li'): + tag = item.a.text + count = int(count_regex.match(item.text).group(1)) + + tags[tag] = count + + break + +tag_regex = re.compile(r".+:(.+)$") +def clean(tag): + match = tag_regex.match(tag) + + if match: + return match.group(1) + else: + return tag + +temp = dict() +for k, v in tags.items(): + tag = clean(k) + + if tag in temp: + if v > temp[tag]: + temp[tag] = v + else: + temp[tag] = v + +tags = sorted(temp, key=temp.get, reverse=True) + +print(json.dumps(tags, indent=4)) \ No newline at end of file diff --git a/scripts/update.py b/scripts/update.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/update.sh b/scripts/update.sh new file mode 100644 index 00000000..e69de29b diff --git a/template.json b/template.json new file mode 100644 index 00000000..e69de29b