From 7741502d4c16c206bcfc2424604044a8917d39ff Mon Sep 17 00:00:00 2001 From: tom5079 Date: Mon, 5 Oct 2020 19:17:33 +0900 Subject: [PATCH] Updated update.sh --- scripts/requirements.txt | 2 +- scripts/tags.py | 90 ++++++++++++++++++++-------------------- scripts/update.sh | 4 +- 3 files changed, 48 insertions(+), 48 deletions(-) diff --git a/scripts/requirements.txt b/scripts/requirements.txt index a98ae430..65617532 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,2 +1,2 @@ -requests +requests beautifulsoup4 \ No newline at end of file diff --git a/scripts/tags.py b/scripts/tags.py index f3e167ea..74b3be11 100644 --- a/scripts/tags.py +++ b/scripts/tags.py @@ -1,46 +1,46 @@ -# Returns a list of all tags sorted by the number of tags - -import re -import json - -import requests -from bs4 import BeautifulSoup - -indices = ['123'] + [chr(ord('a')+i) for i in range(26)] -tags = dict() - -count_regex = re.compile(r".+\((\d+)\)$") - -for index in indices: - url = f'https://hitomi.la/alltags-{index}.html' - - soup = BeautifulSoup(requests.get(url).text, 'html.parser') - - for item in soup.select('.content li'): - tag = item.a.text - count = int(count_regex.match(item.text).group(1)) - - tags[tag] = count - -tag_regex = re.compile(r".+:(.+)$") -def clean(tag): - match = tag_regex.match(tag) - - if match: - return match.group(1) - else: - return tag - -temp = dict() -for k, v in tags.items(): - tag = clean(k) - - if tag in temp: - if v > temp[tag]: - temp[tag] = v - else: - temp[tag] = v - -tags = sorted(temp, key=temp.get, reverse=True) - +# Returns a list of all tags sorted by the number of tags + +import re +import json + +import requests +from bs4 import BeautifulSoup + +indices = ['123'] + [chr(ord('a')+i) for i in range(26)] +tags = dict() + +count_regex = re.compile(r".+\((\d+)\)$") + +for index in indices: + url = f'https://hitomi.la/alltags-{index}.html' + + soup = BeautifulSoup(requests.get(url).text, 'html.parser') + + for item in soup.select('.content li'): + tag = item.a.text + count = int(count_regex.match(item.text).group(1)) + + tags[tag] = count + +tag_regex = re.compile(r".+:(.+)$") +def clean(tag): + match = tag_regex.match(tag) + + if match: + return match.group(1) + else: + return tag + +temp = dict() +for k, v in tags.items(): + tag = clean(k) + + if tag in temp: + if v > temp[tag]: + temp[tag] = v + else: + temp[tag] = v + +tags = sorted(temp, key=temp.get, reverse=True) + print(json.dumps(tags, indent=4)) \ No newline at end of file diff --git a/scripts/update.sh b/scripts/update.sh index 21d6401a..91712d50 100644 --- a/scripts/update.sh +++ b/scripts/update.sh @@ -1,7 +1,7 @@ #!/bin/bash -TAGS_PYTHON=/home/tom5079/.virtualenvs/tags/bin/python -TAGS_FOLDER=/mnt/d/Workspace/Pupil-gh-pages +TAGS_PYTHON= +TAGS_FOLDER= $TAGS_PYTHON -m pip install -r $TAGS_FOLDER/scripts/requirements.txt > NUL