122 lines
2.8 KiB
Python
122 lines
2.8 KiB
Python
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from selenium.common.exceptions import TimeoutException
|
|
from bs4 import BeautifulSoup
|
|
|
|
import UtilPack as util
|
|
import DataClass as info
|
|
|
|
|
|
#
|
|
def GetSearchResult(searchWord):
|
|
url = getSiteUrl(searchWord)
|
|
|
|
util.DbgOut("EHentai : " + url)
|
|
|
|
driver = webdriver.Chrome()
|
|
driver.get(url)
|
|
|
|
# 웹페이지가 로드될 때까지 기다리기
|
|
try:
|
|
WebDriverWait(driver, 30).until(
|
|
EC.presence_of_element_located((By.CLASS_NAME, 'dp'))
|
|
)
|
|
except TimeoutException:
|
|
util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
|
|
driver.quit()
|
|
return
|
|
|
|
strContent = driver.page_source
|
|
driver.quit()
|
|
|
|
parseMangaInfos(strContent)
|
|
|
|
pass
|
|
|
|
def GetGalInfoFromID(gal_id):
|
|
|
|
#익헨은 https://koromo.xyz/api/search/ehash?id=번호 로 검색해서 토큰값 얻은 다음에
|
|
#https://exhentai.org/g/번호/토큰
|
|
#https://e-hentai.org/api/search/ehash?id=2890782
|
|
|
|
pass
|
|
|
|
#
|
|
def getSiteUrl(searchWord):
|
|
strRet = "https://e-hentai.org/"
|
|
if False == util.IsEmptyStr(searchWord):
|
|
strRet = strRet + "?f_search=" + searchWord
|
|
|
|
return strRet
|
|
|
|
|
|
#
|
|
def parseMangaInfos(html_doc):
|
|
# BeautifulSoup 객체 생성
|
|
soup = BeautifulSoup(html_doc, 'html.parser')
|
|
gallery_table = soup.find('table', class_='itg gltc')
|
|
gls = gallery_table.find_all('tr')
|
|
|
|
for gl in gls:
|
|
gl1cParse(gl)
|
|
gl2cParse(gl)
|
|
gl3cParse(gl)
|
|
|
|
|
|
# type
|
|
def gl1cParse(soup_element):
|
|
element = soup_element.find('td', class_='gl1c glcat')
|
|
|
|
if element is None:
|
|
return
|
|
|
|
man_type = element.find('div')
|
|
|
|
print("type : " + man_type.text)
|
|
|
|
# torrent
|
|
def gl2cParse(soup_element):
|
|
element = soup_element.find('td', class_='gl2c')
|
|
|
|
if element is None:
|
|
return
|
|
|
|
trt_btn = element.find('div', class_='gldown')
|
|
trt_url = trt_btn.find('a')
|
|
|
|
if trt_url:
|
|
url = trt_url.get('href')
|
|
print("torrent : " + url)
|
|
else:
|
|
print("torrent : none")
|
|
|
|
|
|
#
|
|
def gl3cParse(soup_element):
|
|
element = soup_element.find('td', class_='gl3c glname')
|
|
|
|
if element is None:
|
|
return
|
|
|
|
elemenr_url = element.find('a')
|
|
man_url = elemenr_url.get('href')
|
|
|
|
element_title = element.find('div', class_='glink')
|
|
man_title = element_title.text
|
|
|
|
print("title : " + man_title)
|
|
print("Url : " + man_url)
|
|
|
|
print("tags : ")
|
|
tags = element.find_all('div', class_='gt')
|
|
man_tags = []
|
|
for tag in tags:
|
|
man_tag = tag.get('title')
|
|
print(" " + man_tag)
|
|
man_tags.append(man_tag)
|
|
|
|
pass
|
|
|