Files
CollectMangaInfo/GetArc_Hitomi.py
2025-07-15 11:14:05 +09:00

186 lines
4.9 KiB
Python

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
import time
import UtilPack as util
import DataClass as info
#
def GetSearchResult(searchWord):
url = getSiteUrl(searchWord)
util.DbgOut("Hitomi : " + url, True)
driver = webdriver.Chrome()
driver.get(url)
# 웹페이지가 로드될 때까지 기다리기
try:
WebDriverWait(driver, 10).until(
#EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
lambda d: d.execute_script("return document.readyState") == "complete"
)
except TimeoutException:
util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.", True)
driver.quit()
return
strContent = driver.page_source
listRet = parseMangaInfos(strContent)
for Idx in range(len(listRet)):
print(f"{Idx} : {listRet[Idx]}")
driver.quit()
def GetListSearchResult(list_ID):
driver = webdriver.Chrome()
# 웹페이지가 로드될 때까지 기다리기
try:
for id in list_ID:
url = getSiteUrl(id)
util.DbgOut("Hitomi : " + url, True)
driver.get(url)
WebDriverWait(driver, 10).until(
lambda d: d.execute_script("return document.readyState") == "complete"
)
time.sleep(2)
strContent = driver.page_source
listRet = parseMangaInfos(strContent)
#for Idx in range(len(listRet)):
# print(f"{Idx} : {listRet[Idx]}")
try:
for Idx in range(len(listRet)):
print(f"{Idx} : {listRet[Idx]}")
with open( f"{id}.txt", 'w') as file:
for item in listRet[Idx]:
file.write( + "\n")
file.close()
except IOError:
util.DbgOut(f"Error: Could not write to the file at {id}.txt.", True)
except Exception as e:
util.DbgOut("Hitomi Loading Error : ", e)
finally:
driver.quit()
def getSiteUrl(searchWord):
strRet = "https://hitomi.la/"
if False == searchWord.isdigit():
strRet = f"{strRet}search.html?{searchWord}"
else:
strRet = f"{strRet}galleries/{searchWord}.html"
return strRet
#
def parseMangaInfos(html_doc):
# BeautifulSoup 객체 생성
soup = BeautifulSoup(html_doc, 'html.parser')
gallery_elements = soup.find_all(class_='gallery-content')
listDJs = []
for element in gallery_elements:
listDJ = djParse(element)
listDJs.extend(listDJ)
return listDJs
def djParse(soup_element):
childs = soup_element.find_all(class_='dj')
listInfos = []
for child in childs:
info = djTitleParse(child)
listTag1 = djArtistParse(child, info)
listTag2 = djDescParse(child, info)
listInfos.append(info)
return listInfos
def djTitleParse(input_element):
element = input_element.find('h1', class_='lillie')
title = element.text
a_tag = element.find('a')
url = a_tag.get('href')
#util.DbgOut("title : " + title)
#util.DbgOut("URl : " + url)
return info.CBZInfo(title, url)
def djArtistParse(input_element, retPtr):
element = input_element.find('div', class_='artist-list')
a_tags = element.find_all('a')
listArtists = []
for tag in a_tags:
artist = tag.text
a_url = tag.get('href')
retPtr.AddArtist(artist)
listArtists.append( info.TagInfo(artist, a_url) )
return listArtists
def djDescParse(input_element, retPtr):
element = input_element.find('table', class_='dj-desc')
tb_rows = element.find_all('tr')
listTags = []
for row in tb_rows:
tds = row.find_all('td')
if 2 != len(tds):
util.DbgOut("Warning : td get failed")
continue
outMsg = f"{tds[0].text} : \r\n"
a_tags = tds[1].find_all('a')
for tag in a_tags:
tag_name = tag.text
tag_url = tag.get('href')
retPtr.AddTag(tag_name)
listTags.append(info.TagInfo(tag_name, tag_url))
outMsg += f" {tag_name} {tag_url}\r\n"
#util.DbgOut(outMsg)
#
if "Series" == tds[0]:
retPtr.serires = listTags[-1].name
elif "Type" == tds[0]:
retPtr.type = listTags[-1].name
elif "Language" == tds[0]:
retPtr.language = listTags[-1].name
else:
pass
return listTags