from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException from bs4 import BeautifulSoup import time import UtilPack as util import DataClass as info # def GetSearchResult(searchWord): url = getSiteUrl(searchWord) util.DbgOut("Hitomi : " + url, True) driver = webdriver.Chrome() driver.get(url) # 웹페이지가 로드될 때까지 기다리기 try: WebDriverWait(driver, 10).until( #EC.presence_of_element_located((By.CLASS_NAME, 'lillie')) lambda d: d.execute_script("return document.readyState") == "complete" ) except TimeoutException: util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.") driver.quit() return strContent = driver.page_source listRet = parseMangaInfos(strContent) for Idx in range(len(listRet)): print(f"{Idx} : {listRet[Idx]}") driver.quit() def GetListSearchResult(list_ID): driver = webdriver.Chrome() # 웹페이지가 로드될 때까지 기다리기 try: for id in list_ID: url = getSiteUrl(id) util.DbgOut("Hitomi : " + url, True) driver.get(url) WebDriverWait(driver, 10).until( lambda d: d.execute_script("return document.readyState") == "complete" ) time.sleep(2) strContent = driver.page_source listRet = parseMangaInfos(strContent) for Idx in range(len(listRet)): print(f"{Idx} : {listRet[Idx]}") except Exception as e: util.DbgOut("Hitomi Loading Error : ", e) finally: driver.quit() # def getSiteUrl(searchWord): strRet = "https://hitomi.la/" if False == searchWord.isdigit(): strRet = f"{strRet}search.html?{searchWord}" else: strRet = f"{strRet}galleries/{searchWord}.html" return strRet # def parseMangaInfos(html_doc): # BeautifulSoup 객체 생성 soup = BeautifulSoup(html_doc, 'html.parser') gallery_elements = soup.find_all(class_='gallery-content') listDJs = [] for element in gallery_elements: listDJ = djParse(element) listDJs.extend(listDJ) return listDJs def djParse(soup_element): childs = soup_element.find_all(class_='dj') listInfos = [] for child in childs: info = djTitleParse(child) listTag1 = djArtistParse(child, info) listTag2 = djDescParse(child, info) listInfos.append(info) return listInfos def djTitleParse(input_element): element = input_element.find('h1', class_='lillie') title = element.text a_tag = element.find('a') url = a_tag.get('href') #util.DbgOut("title : " + title) #util.DbgOut("URl : " + url) return info.CBZInfo(title, url) def djArtistParse(input_element, retPtr): element = input_element.find('div', class_='artist-list') a_tags = element.find_all('a') listArtists = [] for tag in a_tags: artist = tag.text a_url = tag.get('href') retPtr.AddArtist(artist) listArtists.append( info.TagInfo(artist, a_url) ) return listArtists def djDescParse(input_element, retPtr): element = input_element.find('table', class_='dj-desc') tb_rows = element.find_all('tr') listTags = [] for row in tb_rows: tds = row.find_all('td') if 2 != len(tds): util.DbgOut("Warning : td get failed") continue outMsg = f"{tds[0].text} : \r\n" a_tags = tds[1].find_all('a') for tag in a_tags: tag_name = tag.text tag_url = tag.get('href') retPtr.AddTag(tag_name) listTags.append(info.TagInfo(tag_name, tag_url)) outMsg += f" {tag_name} {tag_url}\r\n" #util.DbgOut(outMsg) # if "Series" == tds[0]: retPtr.serires = listTags[-1].name elif "Type" == tds[0]: retPtr.type = listTags[-1].name elif "Language" == tds[0]: retPtr.language = listTags[-1].name else: pass return listTags