from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException from bs4 import BeautifulSoup import UtilPack as util listResult = [] # def GetSearchResult(searchWord): url = getSiteUrl(searchWord) util.DbgOut("EHentai : " + url) driver = webdriver.Chrome() driver.get(url) # 웹페이지가 로드될 때까지 기다리기 try: WebDriverWait(driver, 30).until( EC.presence_of_element_located((By.CLASS_NAME, 'dp')) ) except TimeoutException: util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.") driver.quit() return strContent = driver.page_source driver.quit() parseMangaInfos(strContent) pass # def getSiteUrl(searchWord): strRet = "https://e-hentai.org/" if False == util.IsEmptyStr(searchWord): strRet = strRet + "?f_search=" + searchWord return strRet # def parseMangaInfos(html_doc): # BeautifulSoup 객체 생성 soup = BeautifulSoup(html_doc, 'html.parser') gallery_table = soup.find('table', class_='itg gltc') gls = gallery_table.find_all('tr') for gl in gls: gl1cParse(gl) gl2cParse(gl) gl3cParse(gl) print("\r\n") # type def gl1cParse(soup_element): element = soup_element.find('td', class_='gl1c glcat') if element is None: return man_type = element.find('div') print("type : " + man_type.text) # torrent def gl2cParse(soup_element): element = soup_element.find('td', class_='gl2c') if element is None: return trt_btn = element.find('div', class_='gldown') trt_url = trt_btn.find('a') if trt_url: url = trt_url.get('href') print("torrent : " + url) else: print("torrent : none") # def gl3cParse(soup_element): element = soup_element.find('td', class_='gl3c glname') if element is None: return elemenr_url = element.find('a') man_url = elemenr_url.get('href') element_title = element.find('div', class_='glink') man_title = element_title.text print("title : " + man_title) print("Url : " + man_url) print("tags : ") tags = element.find_all('div', class_='gt') man_tags = [] for tag in tags: man_tag = tag.get('title') print(" " + man_tag) man_tags.append(man_tag) pass