Update DataClass.py, GetArc_Ehentai.py, and 7 more files...
데이터 클래스 정의, 퍼필 다운로더 json 파서...
This commit is contained in:
@@ -6,9 +6,10 @@ from selenium.common.exceptions import TimeoutException
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import UtilPack as util
|
||||
import DataClass as info
|
||||
|
||||
listResult = []
|
||||
|
||||
|
||||
#
|
||||
def GetSearchResult(searchWord):
|
||||
url = getSiteUrl(searchWord)
|
||||
|
||||
@@ -39,70 +40,104 @@ def getSiteUrl(searchWord):
|
||||
strRet = "https://hitomi.la/"
|
||||
|
||||
if False == util.IsEmptyStr(searchWord):
|
||||
strRet = strRet + "search.html?" + searchWord
|
||||
if False == searchWord.isdigit():
|
||||
strRet = strRet + "search.html?" + searchWord
|
||||
else:
|
||||
strRet = strRet + "galleries/" + searchWord + ".html"
|
||||
|
||||
return strRet
|
||||
|
||||
|
||||
#
|
||||
def parseMangaInfos(html_doc):
|
||||
# BeautifulSoup 객체 생성
|
||||
soup = BeautifulSoup(html_doc, 'html.parser')
|
||||
gallery_elements = soup.find_all(class_='gallery-content')
|
||||
|
||||
listDJs = []
|
||||
for element in gallery_elements:
|
||||
djParse(element)
|
||||
listDJ = djParse(element)
|
||||
listDJs.extend(listDJ)
|
||||
|
||||
print(len(listDJs))
|
||||
|
||||
|
||||
def djParse(soup_element):
|
||||
childs = soup_element.find_all(class_='dj')
|
||||
|
||||
listInfos = []
|
||||
for child in childs:
|
||||
djtitleParse(child)
|
||||
artistlistParse(child)
|
||||
djDescParse(child)
|
||||
info = djTitleParse(child)
|
||||
|
||||
print("\r\n")
|
||||
listTag1 = djArtistParse(child, info)
|
||||
listTag2 = djDescParse(child, info)
|
||||
|
||||
listInfos.append(info)
|
||||
|
||||
def djtitleParse(soup_element):
|
||||
element = soup_element.find('h1', class_='lillie')
|
||||
return listInfos
|
||||
|
||||
|
||||
def djTitleParse(input_element):
|
||||
element = input_element.find('h1', class_='lillie')
|
||||
title = element.text
|
||||
|
||||
a_tag = element.find('a')
|
||||
a_url = a_tag.get('href')
|
||||
|
||||
print("title : " + title)
|
||||
print("URl : " + a_url)
|
||||
url = a_tag.get('href')
|
||||
|
||||
def artistlistParse(soup_element):
|
||||
element = soup_element.find('div', class_='artist-list')
|
||||
util.DbgOut("title : " + title)
|
||||
util.DbgOut("URl : " + url)
|
||||
|
||||
print("artists")
|
||||
return info.CBZInfo(title, url)
|
||||
|
||||
|
||||
def djArtistParse(input_element, retPtr):
|
||||
element = input_element.find('div', class_='artist-list')
|
||||
|
||||
a_tags = element.find_all('a')
|
||||
listArtists = []
|
||||
for tag in a_tags:
|
||||
artist = tag.text
|
||||
a_url = tag.get('href')
|
||||
print(" " + artist + " " + a_url)
|
||||
retPtr.AddArtist(artist)
|
||||
listArtists.append( info.TagInfo(artist, a_url) )
|
||||
|
||||
return listArtists
|
||||
|
||||
def djDescParse(soup_element):
|
||||
element = soup_element.find('table', class_='dj-desc')
|
||||
|
||||
def djDescParse(input_element, retPtr):
|
||||
element = input_element.find('table', class_='dj-desc')
|
||||
tb_rows = element.find_all('tr')
|
||||
listTags = []
|
||||
for row in tb_rows:
|
||||
tds = row.find_all('td')
|
||||
if 2 != len(tds):
|
||||
print("td get failed")
|
||||
util.DbgOut("Warning : td get failed")
|
||||
continue
|
||||
|
||||
print(tds[0].text + " : ")
|
||||
|
||||
outMsg = f"{tds[0].text} : \r\n"
|
||||
|
||||
a_tags = tds[1].find_all('a')
|
||||
for tag in a_tags:
|
||||
tag_name = tag.text
|
||||
tag_url = tag.get('href')
|
||||
print(" " + tag_name + " " + tag_url)
|
||||
|
||||
retPtr.AddTag(tag_name)
|
||||
|
||||
listTags.append(info.TagInfo(tag_name, tag_url))
|
||||
|
||||
outMsg += f" {tag_name} {tag_url}\r\n"
|
||||
|
||||
pass
|
||||
util.DbgOut(outMsg)
|
||||
|
||||
#
|
||||
if "Series" == tds[0]:
|
||||
retPtr.serires = listTags[-1].name
|
||||
elif "Type" == tds[0]:
|
||||
retPtr.type = listTags[-1].name
|
||||
elif "Language" == tds[0]:
|
||||
retPtr.language = listTags[-1].name
|
||||
else:
|
||||
pass
|
||||
|
||||
return listTags
|
||||
|
||||
|
||||
Reference in New Issue
Block a user