Update DataClass.py, GetArc_Hitomi.py, and 2 more files...

작업한거 일단 커밋. 딱히 기능을 추가한건 없지만, 정리는 조금 했다. 정리 더 해야 하는데, 윈도우에서 작업하려고....커밋!!
This commit is contained in:
2025-01-28 17:45:03 +09:00
parent 6fe1cf8da0
commit 07ad09bb50
4 changed files with 105 additions and 41 deletions

View File

@@ -5,6 +5,8 @@ from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
import time
import UtilPack as util
import DataClass as info
@@ -13,15 +15,16 @@ import DataClass as info
def GetSearchResult(searchWord):
url = getSiteUrl(searchWord)
util.DbgOut("Hitomi : " + url)
util.DbgOut("Hitomi : " + url, True)
driver = webdriver.Chrome()
driver.get(url)
# 웹페이지가 로드될 때까지 기다리기
try:
WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
WebDriverWait(driver, 10).until(
#EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
lambda d: d.execute_script("return document.readyState") == "complete"
)
except TimeoutException:
util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
@@ -29,21 +32,49 @@ def GetSearchResult(searchWord):
return
strContent = driver.page_source
listRet = parseMangaInfos(strContent)
for Idx in range(len(listRet)):
print(f"{Idx} : {listRet[Idx]}")
driver.quit()
def GetListSearchResult(list_ID):
driver = webdriver.Chrome()
# 웹페이지가 로드될 때까지 기다리기
try:
for id in list_ID:
url = getSiteUrl(id)
util.DbgOut("Hitomi : " + url, True)
driver.get(url)
WebDriverWait(driver, 10).until(
lambda d: d.execute_script("return document.readyState") == "complete"
)
time.sleep(2)
strContent = driver.page_source
listRet = parseMangaInfos(strContent)
parseMangaInfos(strContent)
pass
for Idx in range(len(listRet)):
print(f"{Idx} : {listRet[Idx]}")
except Exception as e:
util.DbgOut("Hitomi Loading Error : ", e)
finally:
driver.quit()
#
def getSiteUrl(searchWord):
strRet = "https://hitomi.la/"
if False == util.IsEmptyStr(searchWord):
if False == searchWord.isdigit():
strRet = strRet + "search.html?" + searchWord
else:
strRet = strRet + "galleries/" + searchWord + ".html"
if False == searchWord.isdigit():
strRet = f"{strRet}search.html?{searchWord}"
else:
strRet = f"{strRet}galleries/{searchWord}.html"
return strRet
@@ -57,8 +88,8 @@ def parseMangaInfos(html_doc):
for element in gallery_elements:
listDJ = djParse(element)
listDJs.extend(listDJ)
print(len(listDJs))
return listDJs
def djParse(soup_element):
@@ -83,8 +114,8 @@ def djTitleParse(input_element):
a_tag = element.find('a')
url = a_tag.get('href')
util.DbgOut("title : " + title)
util.DbgOut("URl : " + url)
#util.DbgOut("title : " + title)
#util.DbgOut("URl : " + url)
return info.CBZInfo(title, url)
@@ -127,7 +158,7 @@ def djDescParse(input_element, retPtr):
outMsg += f" {tag_name} {tag_url}\r\n"
util.DbgOut(outMsg)
#util.DbgOut(outMsg)
#
if "Series" == tds[0]: