175 lines
4.4 KiB
Python
175 lines
4.4 KiB
Python
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from selenium.common.exceptions import TimeoutException
|
|
from bs4 import BeautifulSoup
|
|
|
|
import time
|
|
|
|
import UtilPack as util
|
|
import DataClass as info
|
|
|
|
|
|
#
|
|
def GetSearchResult(searchWord):
|
|
url = getSiteUrl(searchWord)
|
|
|
|
util.DbgOut("Hitomi : " + url, True)
|
|
|
|
driver = webdriver.Chrome()
|
|
driver.get(url)
|
|
|
|
# 웹페이지가 로드될 때까지 기다리기
|
|
try:
|
|
WebDriverWait(driver, 10).until(
|
|
#EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
|
|
lambda d: d.execute_script("return document.readyState") == "complete"
|
|
)
|
|
except TimeoutException:
|
|
util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
|
|
driver.quit()
|
|
return
|
|
|
|
strContent = driver.page_source
|
|
|
|
listRet = parseMangaInfos(strContent)
|
|
|
|
for Idx in range(len(listRet)):
|
|
print(f"{Idx} : {listRet[Idx]}")
|
|
|
|
driver.quit()
|
|
|
|
def GetListSearchResult(list_ID):
|
|
driver = webdriver.Chrome()
|
|
|
|
# 웹페이지가 로드될 때까지 기다리기
|
|
try:
|
|
for id in list_ID:
|
|
url = getSiteUrl(id)
|
|
util.DbgOut("Hitomi : " + url, True)
|
|
|
|
driver.get(url)
|
|
|
|
WebDriverWait(driver, 10).until(
|
|
lambda d: d.execute_script("return document.readyState") == "complete"
|
|
)
|
|
|
|
time.sleep(2)
|
|
|
|
strContent = driver.page_source
|
|
listRet = parseMangaInfos(strContent)
|
|
|
|
for Idx in range(len(listRet)):
|
|
print(f"{Idx} : {listRet[Idx]}")
|
|
|
|
except Exception as e:
|
|
util.DbgOut("Hitomi Loading Error : ", e)
|
|
finally:
|
|
driver.quit()
|
|
#
|
|
def getSiteUrl(searchWord):
|
|
strRet = "https://hitomi.la/"
|
|
|
|
if False == searchWord.isdigit():
|
|
strRet = f"{strRet}search.html?{searchWord}"
|
|
else:
|
|
strRet = f"{strRet}galleries/{searchWord}.html"
|
|
|
|
return strRet
|
|
|
|
#
|
|
def parseMangaInfos(html_doc):
|
|
# BeautifulSoup 객체 생성
|
|
soup = BeautifulSoup(html_doc, 'html.parser')
|
|
gallery_elements = soup.find_all(class_='gallery-content')
|
|
|
|
listDJs = []
|
|
for element in gallery_elements:
|
|
listDJ = djParse(element)
|
|
listDJs.extend(listDJ)
|
|
|
|
return listDJs
|
|
|
|
|
|
def djParse(soup_element):
|
|
childs = soup_element.find_all(class_='dj')
|
|
|
|
listInfos = []
|
|
for child in childs:
|
|
info = djTitleParse(child)
|
|
|
|
listTag1 = djArtistParse(child, info)
|
|
listTag2 = djDescParse(child, info)
|
|
|
|
listInfos.append(info)
|
|
|
|
return listInfos
|
|
|
|
|
|
def djTitleParse(input_element):
|
|
element = input_element.find('h1', class_='lillie')
|
|
title = element.text
|
|
|
|
a_tag = element.find('a')
|
|
url = a_tag.get('href')
|
|
|
|
#util.DbgOut("title : " + title)
|
|
#util.DbgOut("URl : " + url)
|
|
|
|
return info.CBZInfo(title, url)
|
|
|
|
|
|
def djArtistParse(input_element, retPtr):
|
|
element = input_element.find('div', class_='artist-list')
|
|
|
|
a_tags = element.find_all('a')
|
|
listArtists = []
|
|
for tag in a_tags:
|
|
artist = tag.text
|
|
a_url = tag.get('href')
|
|
retPtr.AddArtist(artist)
|
|
listArtists.append( info.TagInfo(artist, a_url) )
|
|
|
|
return listArtists
|
|
|
|
|
|
def djDescParse(input_element, retPtr):
|
|
element = input_element.find('table', class_='dj-desc')
|
|
tb_rows = element.find_all('tr')
|
|
listTags = []
|
|
for row in tb_rows:
|
|
tds = row.find_all('td')
|
|
if 2 != len(tds):
|
|
util.DbgOut("Warning : td get failed")
|
|
continue
|
|
|
|
|
|
outMsg = f"{tds[0].text} : \r\n"
|
|
|
|
a_tags = tds[1].find_all('a')
|
|
for tag in a_tags:
|
|
tag_name = tag.text
|
|
tag_url = tag.get('href')
|
|
|
|
retPtr.AddTag(tag_name)
|
|
|
|
listTags.append(info.TagInfo(tag_name, tag_url))
|
|
|
|
outMsg += f" {tag_name} {tag_url}\r\n"
|
|
|
|
#util.DbgOut(outMsg)
|
|
|
|
#
|
|
if "Series" == tds[0]:
|
|
retPtr.serires = listTags[-1].name
|
|
elif "Type" == tds[0]:
|
|
retPtr.type = listTags[-1].name
|
|
elif "Language" == tds[0]:
|
|
retPtr.language = listTags[-1].name
|
|
else:
|
|
pass
|
|
|
|
return listTags
|
|
|