from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException from bs4 import BeautifulSoup class MangaMetaInfo: def __init__(self, title, url, lang, manType, *tags): self.title = title self.url = url self.lang = lang self.manType = manType self.tags = tags pass #series #type #languages #tags class MangaInfo: listResult = [] def GetSearchResult(self, searchWord): url = self.getSiteUrl() + searchWord driver = webdriver.Chrome() driver.get(url) # 웹페이지가 로드될 때까지 기다리기 try: WebDriverWait(driver, 30).until( EC.presence_of_element_located((By.CLASS_NAME, 'lillie')) ) except TimeoutException: print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.") driver.quit() return strContent = driver.page_source driver.quit() self.parseMangaInfos(strContent) pass # def getSiteUrl(self): strRet = "https://hitomi.la/search.html?" return strRet # def parseMangaInfos(self, html_doc): # BeautifulSoup 객체 생성 soup = BeautifulSoup(html_doc, 'html.parser') gallery_elements = soup.find_all(class_='gallery-content') for element in gallery_elements: self.djParse(element) def djParse(self, soup_element): childs = soup_element.find_all(class_='dj') for child in childs: self.djtitleParse(child) self.artistlistParse(child) self.djDescParse(child) print("\r\n") def djtitleParse(self, soup_element): element = soup_element.find('h1', class_='lillie') title = element.text a_tag = element.find('a') a_url = a_tag.get('href') print("title : " + title) print("URl : " + a_url) def artistlistParse(self, soup_element): element = soup_element.find('div', class_='artist-list') print("artists") a_tags = element.find_all('a') for tag in a_tags: artist = tag.text a_url = tag.get('href') print(" " + artist + " " + a_url) def djDescParse(self, soup_element): element = soup_element.find('table', class_='dj-desc') tb_rows = element.find_all('tr') for row in tb_rows: tds = row.find_all('td') if 2 != len(tds): print("td get failed") continue print(tds[0].text + " : ") a_tags = tds[1].find_all('a') for tag in a_tags: tag_name = tag.text tag_url = tag.get('href') print(" " + tag_name + " " + tag_url) pass