first added
This commit is contained in:
118
GetArc_Ehentai.py
Normal file
118
GetArc_Ehentai.py
Normal file
@@ -0,0 +1,118 @@
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
class MangaMetaInfo:
|
||||
def __init__(self, title, url, lang, manType, *tags):
|
||||
self.title = title
|
||||
self.url = url
|
||||
self.lang = lang
|
||||
self.manType = manType
|
||||
self.tags = tags
|
||||
pass
|
||||
|
||||
#series
|
||||
#type
|
||||
#languages
|
||||
#tags
|
||||
|
||||
|
||||
class MangaInfo:
|
||||
|
||||
listResult = []
|
||||
|
||||
def GetSearchResult(self, searchWord):
|
||||
url = self.getSiteUrl() + searchWord
|
||||
driver = webdriver.Chrome()
|
||||
driver.get(url)
|
||||
|
||||
# 웹페이지가 로드될 때까지 기다리기
|
||||
try:
|
||||
WebDriverWait(driver, 30).until(
|
||||
EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
|
||||
)
|
||||
except TimeoutException:
|
||||
print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
|
||||
driver.quit()
|
||||
return
|
||||
|
||||
strContent = driver.page_source
|
||||
driver.quit()
|
||||
|
||||
self.parseMangaInfos(strContent)
|
||||
|
||||
pass
|
||||
|
||||
#
|
||||
def getSiteUrl(self):
|
||||
strRet = "https://e-hentai.org/search.html?"
|
||||
|
||||
return strRet
|
||||
|
||||
|
||||
#
|
||||
def parseMangaInfos(self, html_doc):
|
||||
# BeautifulSoup 객체 생성
|
||||
soup = BeautifulSoup(html_doc, 'html.parser')
|
||||
gallery_elements = soup.find_all(class_='gallery-content')
|
||||
|
||||
for element in gallery_elements:
|
||||
self.djParse(element)
|
||||
|
||||
|
||||
def djParse(self, soup_element):
|
||||
childs = soup_element.find_all(class_='dj')
|
||||
|
||||
for child in childs:
|
||||
self.djtitleParse(child)
|
||||
self.artistlistParse(child)
|
||||
self.djDescParse(child)
|
||||
|
||||
print("\r\n")
|
||||
|
||||
|
||||
def djtitleParse(self, soup_element):
|
||||
element = soup_element.find('h1', class_='lillie')
|
||||
title = element.text
|
||||
|
||||
a_tag = element.find('a')
|
||||
a_url = a_tag.get('href')
|
||||
|
||||
print("title : " + title)
|
||||
print("URl : " + a_url)
|
||||
|
||||
def artistlistParse(self, soup_element):
|
||||
element = soup_element.find('div', class_='artist-list')
|
||||
|
||||
print("artists")
|
||||
|
||||
a_tags = element.find_all('a')
|
||||
for tag in a_tags:
|
||||
artist = tag.text
|
||||
a_url = tag.get('href')
|
||||
print(" " + artist + " " + a_url)
|
||||
|
||||
|
||||
def djDescParse(self, soup_element):
|
||||
element = soup_element.find('table', class_='dj-desc')
|
||||
tb_rows = element.find_all('tr')
|
||||
for row in tb_rows:
|
||||
tds = row.find_all('td')
|
||||
if 2 != len(tds):
|
||||
print("td get failed")
|
||||
continue
|
||||
|
||||
print(tds[0].text + " : ")
|
||||
|
||||
a_tags = tds[1].find_all('a')
|
||||
for tag in a_tags:
|
||||
tag_name = tag.text
|
||||
tag_url = tag.get('href')
|
||||
print(" " + tag_name + " " + tag_url)
|
||||
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user