Updated update.sh

This commit is contained in:
tom5079
2020-10-05 19:17:33 +09:00
parent 6296108000
commit 7741502d4c
3 changed files with 48 additions and 48 deletions

View File

@@ -1,2 +1,2 @@
requests requests
beautifulsoup4 beautifulsoup4

View File

@@ -1,46 +1,46 @@
# Returns a list of all tags sorted by the number of tags # Returns a list of all tags sorted by the number of tags
import re import re
import json import json
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
indices = ['123'] + [chr(ord('a')+i) for i in range(26)] indices = ['123'] + [chr(ord('a')+i) for i in range(26)]
tags = dict() tags = dict()
count_regex = re.compile(r".+\((\d+)\)$") count_regex = re.compile(r".+\((\d+)\)$")
for index in indices: for index in indices:
url = f'https://hitomi.la/alltags-{index}.html' url = f'https://hitomi.la/alltags-{index}.html'
soup = BeautifulSoup(requests.get(url).text, 'html.parser') soup = BeautifulSoup(requests.get(url).text, 'html.parser')
for item in soup.select('.content li'): for item in soup.select('.content li'):
tag = item.a.text tag = item.a.text
count = int(count_regex.match(item.text).group(1)) count = int(count_regex.match(item.text).group(1))
tags[tag] = count tags[tag] = count
tag_regex = re.compile(r".+:(.+)$") tag_regex = re.compile(r".+:(.+)$")
def clean(tag): def clean(tag):
match = tag_regex.match(tag) match = tag_regex.match(tag)
if match: if match:
return match.group(1) return match.group(1)
else: else:
return tag return tag
temp = dict() temp = dict()
for k, v in tags.items(): for k, v in tags.items():
tag = clean(k) tag = clean(k)
if tag in temp: if tag in temp:
if v > temp[tag]: if v > temp[tag]:
temp[tag] = v temp[tag] = v
else: else:
temp[tag] = v temp[tag] = v
tags = sorted(temp, key=temp.get, reverse=True) tags = sorted(temp, key=temp.get, reverse=True)
print(json.dumps(tags, indent=4)) print(json.dumps(tags, indent=4))

View File

@@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
TAGS_PYTHON=/home/tom5079/.virtualenvs/tags/bin/python TAGS_PYTHON=
TAGS_FOLDER=/mnt/d/Workspace/Pupil-gh-pages TAGS_FOLDER=
$TAGS_PYTHON -m pip install -r $TAGS_FOLDER/scripts/requirements.txt > NUL $TAGS_PYTHON -m pip install -r $TAGS_FOLDER/scripts/requirements.txt > NUL