Updated update.sh
This commit is contained in:
@@ -1,2 +1,2 @@
|
|||||||
requests
|
requests
|
||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
@@ -1,46 +1,46 @@
|
|||||||
# Returns a list of all tags sorted by the number of tags
|
# Returns a list of all tags sorted by the number of tags
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
indices = ['123'] + [chr(ord('a')+i) for i in range(26)]
|
indices = ['123'] + [chr(ord('a')+i) for i in range(26)]
|
||||||
tags = dict()
|
tags = dict()
|
||||||
|
|
||||||
count_regex = re.compile(r".+\((\d+)\)$")
|
count_regex = re.compile(r".+\((\d+)\)$")
|
||||||
|
|
||||||
for index in indices:
|
for index in indices:
|
||||||
url = f'https://hitomi.la/alltags-{index}.html'
|
url = f'https://hitomi.la/alltags-{index}.html'
|
||||||
|
|
||||||
soup = BeautifulSoup(requests.get(url).text, 'html.parser')
|
soup = BeautifulSoup(requests.get(url).text, 'html.parser')
|
||||||
|
|
||||||
for item in soup.select('.content li'):
|
for item in soup.select('.content li'):
|
||||||
tag = item.a.text
|
tag = item.a.text
|
||||||
count = int(count_regex.match(item.text).group(1))
|
count = int(count_regex.match(item.text).group(1))
|
||||||
|
|
||||||
tags[tag] = count
|
tags[tag] = count
|
||||||
|
|
||||||
tag_regex = re.compile(r".+:(.+)$")
|
tag_regex = re.compile(r".+:(.+)$")
|
||||||
def clean(tag):
|
def clean(tag):
|
||||||
match = tag_regex.match(tag)
|
match = tag_regex.match(tag)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
return match.group(1)
|
return match.group(1)
|
||||||
else:
|
else:
|
||||||
return tag
|
return tag
|
||||||
|
|
||||||
temp = dict()
|
temp = dict()
|
||||||
for k, v in tags.items():
|
for k, v in tags.items():
|
||||||
tag = clean(k)
|
tag = clean(k)
|
||||||
|
|
||||||
if tag in temp:
|
if tag in temp:
|
||||||
if v > temp[tag]:
|
if v > temp[tag]:
|
||||||
temp[tag] = v
|
temp[tag] = v
|
||||||
else:
|
else:
|
||||||
temp[tag] = v
|
temp[tag] = v
|
||||||
|
|
||||||
tags = sorted(temp, key=temp.get, reverse=True)
|
tags = sorted(temp, key=temp.get, reverse=True)
|
||||||
|
|
||||||
print(json.dumps(tags, indent=4))
|
print(json.dumps(tags, indent=4))
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
TAGS_PYTHON=/home/tom5079/.virtualenvs/tags/bin/python
|
TAGS_PYTHON=
|
||||||
TAGS_FOLDER=/mnt/d/Workspace/Pupil-gh-pages
|
TAGS_FOLDER=
|
||||||
|
|
||||||
$TAGS_PYTHON -m pip install -r $TAGS_FOLDER/scripts/requirements.txt > NUL
|
$TAGS_PYTHON -m pip install -r $TAGS_FOLDER/scripts/requirements.txt > NUL
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user