CollectMangaInfo/GetArc_Ehentai.py

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup

import UtilPack as util
import DataClass as info


#
def GetSearchResult(searchWord):
    url = getSiteUrl(searchWord)

    util.DbgOut("EHentai : " + url)

    driver = webdriver.Chrome()
    driver.get(url)

    # 웹페이지가 로드될 때까지 기다리기
    try:
        WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.CLASS_NAME, 'dp'))
        )
    except TimeoutException:
        util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
        driver.quit()
        return

    strContent = driver.page_source
    driver.quit()

    parseMangaInfos(strContent)

    pass

def GetGalInfoFromID(gal_id):

    #익헨은 https://koromo.xyz/api/search/ehash?id=번호 로 검색해서 토큰값 얻은 다음에
    #https://exhentai.org/g/번호/토큰
    #https://e-hentai.org/api/search/ehash?id=2890782

    pass

#
def getSiteUrl(searchWord):
    strRet = "https://e-hentai.org/"
    if False == util.IsEmptyStr(searchWord):
        strRet = strRet + "?f_search=" + searchWord

    return strRet


#
def parseMangaInfos(html_doc):
    # BeautifulSoup 객체 생성
    soup = BeautifulSoup(html_doc, 'html.parser')
    gallery_table = soup.find('table', class_='itg gltc')
    gls = gallery_table.find_all('tr')

    for gl in gls:
        gl1cParse(gl)
        gl2cParse(gl)
        gl3cParse(gl)


# type
def gl1cParse(soup_element):
    element = soup_element.find('td', class_='gl1c glcat')

    if element is None:
        return

    man_type = element.find('div')

    print("type : " + man_type.text)

# torrent
def gl2cParse(soup_element):
    element = soup_element.find('td', class_='gl2c')

    if element is None:
        return

    trt_btn = element.find('div', class_='gldown')
    trt_url = trt_btn.find('a')

    if trt_url:
        url = trt_url.get('href')
        print("torrent : " + url)
    else:
        print("torrent : none")


#
def gl3cParse(soup_element):
    element = soup_element.find('td', class_='gl3c glname')

    if element is None:
        return

    elemenr_url = element.find('a')
    man_url = elemenr_url.get('href')

    element_title = element.find('div', class_='glink')
    man_title = element_title.text

    print("title : " + man_title)
    print("Url : " + man_url)

    print("tags : ")
    tags = element.find_all('div', class_='gt')
    man_tags = []
    for tag in tags:
        man_tag = tag.get('title')
        print("       " + man_tag)
        man_tags.append(man_tag)

    pass