Files
CollectMangaInfo/GetArc_Ehentai.py
2024-04-05 21:51:36 +09:00

116 lines
2.6 KiB
Python

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
listResult = []
def GetSearchResult(searchWord):
print("E-hentai start")
#url = getSiteUrl() + searchWord
url = "https://e-hentai.org/"
driver = webdriver.Chrome()
driver.get(url)
# 웹페이지가 로드될 때까지 기다리기
try:
WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.CLASS_NAME, 'dp'))
)
except TimeoutException:
print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
driver.quit()
return
strContent = driver.page_source
driver.quit()
parseMangaInfos(strContent)
pass
#
def getSiteUrl():
strRet = "https://e-hentai.org/?f_search="
return strRet
#
def parseMangaInfos(html_doc):
# BeautifulSoup 객체 생성
soup = BeautifulSoup(html_doc, 'html.parser')
gallery_table = soup.find('table', class_='itg gltc')
gls = gallery_table.find_all('tr')
idx = 1;
for gl in gls:
gl1cParse(gl)
gl2cParse(gl)
gl3cParse(gl)
print("\r\n")
# type
def gl1cParse(soup_element):
element = soup_element.find('td', class_='gl1c glcat')
if element is None:
return
man_type = element.find('div')
print("type : " + man_type.text)
# torrent
def gl2cParse(soup_element):
element = soup_element.find('td', class_='gl2c')
if element is None:
return
trt_btn = element.find('div', class_='gldown')
trt_url = trt_btn.find('a')
if trt_url:
url = trt_url.get('href')
print("torrent : " + url)
else:
print("torrent : none")
#
def gl3cParse(soup_element):
element = soup_element.find('td', class_='gl3c glname')
if element is None:
return
elemenr_url = element.find('a')
man_url = elemenr_url.get('href')
element_title = element.find('div', class_='glink')
man_title = element_title.text
print("title : " + man_title)
print("Url : " + man_url)
print("tags : ")
tags = element.find_all('div', class_='gt')
man_tags = []
for tag in tags:
man_tag = tag.get('title')
print(" " + man_tag)
man_tags.append(man_tag)
print(len(man_tags))
pass