from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException from bs4 import BeautifulSoup import time import UtilPack as util import DataClass as info # class GetArc_Hitomi: m_strBaseURL = "https://hitomi.la/" m_listTagsTemp = list[info.TagInfo]() def __init__(self): pass def GetSearchResult(self, strWord: str, bSaveHTML: bool = False): if util.IsEmptyStr(strWord): util.DbgOut("Error : SearchWord is empty", True) return strURL = "" if strWord.isdigit(): strURL = self.getSiteUrlForGallery(int(strWord)) else: strURL = self.getSiteUrlForSearch(strWord) util.DbgOut(f"Hitomi : {strURL}", True) driver = webdriver.Chrome() driver.get(strURL) # 웹페이지가 로드될 때까지 기다리기 try: WebDriverWait(driver, 10).until( #EC.presence_of_element_located((By.CLASS_NAME, 'lillie')) lambda d: d.execute_script("return document.readyState") == "complete" ) except TimeoutException: util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.", True) driver.quit() return strContent = driver.page_source driver.quit() if True == bSaveHTML: strFileName = f"{strWord}_result.html" with open(strFileName, "w", encoding="utf-8") as file: file.write(strContent) util.DbgOut(f"HTML content saved to {strFileName}", True) listRet = self.parseMangaInfos(strContent) for Idx in range(len(listRet)): util.DbgOut(f"{Idx} : {listRet[Idx]}", True) # def GetListSearchResult(self, listID: list[int], bSave: bool = False): driver = webdriver.Chrome() # 웹페이지가 로드될 때까지 기다리기 try: for nID in listID: strURL = self.getSiteUrlForGallery(nID) util.DbgOut(f"Hitomi : {strURL}", True) driver.get(strURL) WebDriverWait(driver, 10).until( lambda d: d.execute_script("return document.readyState") == "complete" ) time.sleep(2) strContent = driver.page_source listRet = self.parseMangaInfos(strContent) #for Idx in range(len(listRet)): # print(f"{Idx} : {listRet[Idx]}") try: for Idx in range(len(listRet)): print(f"{Idx} : {listRet[Idx]}") with open( f"{id}.txt", 'w') as file: for item in listRet[Idx]: file.write( + "\n") except IOError: util.DbgOut(f"Error: Could not write to the file at {id}.txt.", True) except Exception as e: util.DbgOut(f"Hitomi Loading Error : {e}", True) finally: driver.quit() def getSiteUrlForSearch(self, searchWord: str) -> str: return f"{self.m_strBaseURL}search.html?{searchWord}" def getSiteUrlForGallery(self, nHitomiID: int) -> str: return f"{self.m_strBaseURL}galleries/{nHitomiID}.html" # def parseMangaInfos(self, html_doc : str) -> list[info.CBZInfo]: # BeautifulSoup 객체 생성 soup = BeautifulSoup(html_doc, 'html.parser') gallery_elements = soup.find_all(class_='gallery-content') listDJs: list[info.CBZInfo] = [] for element in gallery_elements: listDJ = self.djParse(element) listDJs.extend(listDJ) return listDJs # def djParse(self, soup_element) -> list[info.CBZInfo]: childs = soup_element.find_all(class_='dj') listInfos: list[info.CBZInfo] = [] for child in childs: info = self.djTitleParse(child) self.djArtistParse(child, info) self.djDescParse(child, info) listInfos.append(info) return listInfos # def djTitleParse(self, input_element): element = input_element.find('h1', class_='lillie') strTitle: str = element.text a_tag = element.find('a') strURL: str = a_tag.get('href') #util.DbgOut("title : " + title) #util.DbgOut("URl : " + url) return info.CBZInfo(strTitle, strURL) # def djArtistParse(self, input_element, retPtr): element = input_element.find('div', class_='artist-list') a_tags = element.find_all('a') for tag in a_tags: artist = tag.text a_url = tag.get('href') retPtr.AddArtist(artist) # def djDescParse(self, input_element, retPtr): element = input_element.find('table', class_='dj-desc') tb_rows = element.find_all('tr') listTags = [] for row in tb_rows: tds = row.find_all('td') if 2 != len(tds): util.DbgOut("Warning : td get failed") continue outMsg = f"{tds[0].text} : \r\n" a_tags = tds[1].find_all('a') for tag in a_tags: tag_name = tag.text tag_url = tag.get('href') retPtr.AddTag(tag_name) listTags.append(info.TagInfo(tag_name, tag_url)) outMsg += f" {tag_name} {tag_url}\r\n" #util.DbgOut(outMsg) # if "Series" == tds[0]: retPtr.serires = listTags[-1].name elif "Type" == tds[0]: retPtr.type = listTags[-1].name elif "Language" == tds[0]: retPtr.language = listTags[-1].name else: pass return listTags def main(): # Hitomi Search Test hitomi = GetArc_Hitomi() # 검색어로 검색 #hitomi.GetSearchResult("test") # ID로 검색 hitomi.GetSearchResult("11107", True) # ID 리스트로 검색 #listID = [1234567, 2345678, 3456789] #hitomi.GetListSearchResult(listID, True) # For Main Loop if __name__ == '__main__': main()