Update DataClass.py, GetArc_Ehentai.py, and 3 more files...

E-hentai Page Parse
DataClass added
This commit is contained in:
2024-04-05 21:51:36 +09:00
parent dbe5377d6f
commit 809748a73a
5 changed files with 202 additions and 192 deletions

View File

@@ -21,98 +21,96 @@ class MangaMetaInfo:
#tags
class MangaInfo:
listResult = []
listResult = []
def GetSearchResult(self, searchWord):
url = self.getSiteUrl() + searchWord
driver = webdriver.Chrome()
driver.get(url)
# 웹페이지가 로드될 때까지 기다리기
try:
WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
)
except TimeoutException:
print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
driver.quit()
return
strContent = driver.page_source
def GetSearchResult(searchWord):
url = getSiteUrl() + searchWord
driver = webdriver.Chrome()
driver.get(url)
# 웹페이지가 로드될 때까지 기다리기
try:
WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
)
except TimeoutException:
print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
driver.quit()
return
self.parseMangaInfos(strContent)
pass
#
def getSiteUrl(self):
strRet = "https://hitomi.la/search.html?"
return strRet
strContent = driver.page_source
driver.quit()
#
def parseMangaInfos(self, html_doc):
# BeautifulSoup 객체 생성
soup = BeautifulSoup(html_doc, 'html.parser')
gallery_elements = soup.find_all(class_='gallery-content')
for element in gallery_elements:
self.djParse(element)
parseMangaInfos(strContent)
def djParse(self, soup_element):
childs = soup_element.find_all(class_='dj')
for child in childs:
self.djtitleParse(child)
self.artistlistParse(child)
self.djDescParse(child)
print("\r\n")
def djtitleParse(self, soup_element):
element = soup_element.find('h1', class_='lillie')
title = element.text
pass
#
def getSiteUrl():
strRet = "https://hitomi.la/search.html?"
a_tag = element.find('a')
a_url = a_tag.get('href')
print("title : " + title)
print("URl : " + a_url)
return strRet
#
def parseMangaInfos(html_doc):
# BeautifulSoup 객체 생성
soup = BeautifulSoup(html_doc, 'html.parser')
gallery_elements = soup.find_all(class_='gallery-content')
for element in gallery_elements:
djParse(element)
def artistlistParse(self, soup_element):
element = soup_element.find('div', class_='artist-list')
def djParse(soup_element):
childs = soup_element.find_all(class_='dj')
for child in childs:
djtitleParse(child)
artistlistParse(child)
djDescParse(child)
print("artists")
print("\r\n")
a_tags = element.find_all('a')
def djtitleParse(soup_element):
element = soup_element.find('h1', class_='lillie')
title = element.text
a_tag = element.find('a')
a_url = a_tag.get('href')
print("title : " + title)
print("URl : " + a_url)
def artistlistParse(soup_element):
element = soup_element.find('div', class_='artist-list')
print("artists")
a_tags = element.find_all('a')
for tag in a_tags:
artist = tag.text
a_url = tag.get('href')
print(" " + artist + " " + a_url)
def djDescParse(soup_element):
element = soup_element.find('table', class_='dj-desc')
tb_rows = element.find_all('tr')
for row in tb_rows:
tds = row.find_all('td')
if 2 != len(tds):
print("td get failed")
continue
print(tds[0].text + " : ")
a_tags = tds[1].find_all('a')
for tag in a_tags:
artist = tag.text
a_url = tag.get('href')
print(" " + artist + " " + a_url)
def djDescParse(self, soup_element):
element = soup_element.find('table', class_='dj-desc')
tb_rows = element.find_all('tr')
for row in tb_rows:
tds = row.find_all('td')
if 2 != len(tds):
print("td get failed")
continue
print(tds[0].text + " : ")
a_tags = tds[1].find_all('a')
for tag in a_tags:
tag_name = tag.text
tag_url = tag.get('href')
print(" " + tag_name + " " + tag_url)
pass
tag_name = tag.text
tag_url = tag.get('href')
print(" " + tag_name + " " + tag_url)
pass