Update DataClass.py, GetArc_Ehentai.py, and 3 more files...
E-hentai Page Parse DataClass added
This commit is contained in:
13
DataClass.py
Normal file
13
DataClass.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
class MangaMetaInfo:
|
||||||
|
def __init__(self, title, url, lang, manType, *tags):
|
||||||
|
self.title = title
|
||||||
|
self.url = url
|
||||||
|
self.lang = lang
|
||||||
|
self.manType = manType
|
||||||
|
self.tags = tags
|
||||||
|
pass
|
||||||
|
|
||||||
|
#series
|
||||||
|
#type
|
||||||
|
#languages
|
||||||
|
#tags
|
||||||
@@ -6,34 +6,19 @@ from selenium.common.exceptions import TimeoutException
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
class MangaMetaInfo:
|
|
||||||
def __init__(self, title, url, lang, manType, *tags):
|
|
||||||
self.title = title
|
|
||||||
self.url = url
|
|
||||||
self.lang = lang
|
|
||||||
self.manType = manType
|
|
||||||
self.tags = tags
|
|
||||||
pass
|
|
||||||
|
|
||||||
#series
|
|
||||||
#type
|
|
||||||
#languages
|
|
||||||
#tags
|
|
||||||
|
|
||||||
|
|
||||||
class MangaInfo:
|
|
||||||
|
|
||||||
listResult = []
|
listResult = []
|
||||||
|
|
||||||
def GetSearchResult(self, searchWord):
|
def GetSearchResult(searchWord):
|
||||||
url = self.getSiteUrl() + searchWord
|
print("E-hentai start")
|
||||||
|
#url = getSiteUrl() + searchWord
|
||||||
|
url = "https://e-hentai.org/"
|
||||||
driver = webdriver.Chrome()
|
driver = webdriver.Chrome()
|
||||||
driver.get(url)
|
driver.get(url)
|
||||||
|
|
||||||
# 웹페이지가 로드될 때까지 기다리기
|
# 웹페이지가 로드될 때까지 기다리기
|
||||||
try:
|
try:
|
||||||
WebDriverWait(driver, 30).until(
|
WebDriverWait(driver, 30).until(
|
||||||
EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
|
EC.presence_of_element_located((By.CLASS_NAME, 'dp'))
|
||||||
)
|
)
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
|
print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
|
||||||
@@ -43,76 +28,88 @@ class MangaInfo:
|
|||||||
strContent = driver.page_source
|
strContent = driver.page_source
|
||||||
driver.quit()
|
driver.quit()
|
||||||
|
|
||||||
self.parseMangaInfos(strContent)
|
parseMangaInfos(strContent)
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
#
|
#
|
||||||
def getSiteUrl(self):
|
def getSiteUrl():
|
||||||
strRet = "https://e-hentai.org/search.html?"
|
strRet = "https://e-hentai.org/?f_search="
|
||||||
|
|
||||||
return strRet
|
return strRet
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
def parseMangaInfos(self, html_doc):
|
def parseMangaInfos(html_doc):
|
||||||
# BeautifulSoup 객체 생성
|
# BeautifulSoup 객체 생성
|
||||||
soup = BeautifulSoup(html_doc, 'html.parser')
|
soup = BeautifulSoup(html_doc, 'html.parser')
|
||||||
gallery_elements = soup.find_all(class_='gallery-content')
|
gallery_table = soup.find('table', class_='itg gltc')
|
||||||
|
gls = gallery_table.find_all('tr')
|
||||||
for element in gallery_elements:
|
|
||||||
self.djParse(element)
|
|
||||||
|
|
||||||
|
|
||||||
def djParse(self, soup_element):
|
|
||||||
childs = soup_element.find_all(class_='dj')
|
|
||||||
|
|
||||||
for child in childs:
|
|
||||||
self.djtitleParse(child)
|
|
||||||
self.artistlistParse(child)
|
|
||||||
self.djDescParse(child)
|
|
||||||
|
|
||||||
|
idx = 1;
|
||||||
|
for gl in gls:
|
||||||
|
gl1cParse(gl)
|
||||||
|
gl2cParse(gl)
|
||||||
|
gl3cParse(gl)
|
||||||
print("\r\n")
|
print("\r\n")
|
||||||
|
|
||||||
|
|
||||||
def djtitleParse(self, soup_element):
|
# type
|
||||||
element = soup_element.find('h1', class_='lillie')
|
def gl1cParse(soup_element):
|
||||||
title = element.text
|
element = soup_element.find('td', class_='gl1c glcat')
|
||||||
|
|
||||||
a_tag = element.find('a')
|
if element is None:
|
||||||
a_url = a_tag.get('href')
|
return
|
||||||
|
|
||||||
print("title : " + title)
|
man_type = element.find('div')
|
||||||
print("URl : " + a_url)
|
|
||||||
|
|
||||||
def artistlistParse(self, soup_element):
|
print("type : " + man_type.text)
|
||||||
element = soup_element.find('div', class_='artist-list')
|
|
||||||
|
|
||||||
print("artists")
|
# torrent
|
||||||
|
def gl2cParse(soup_element):
|
||||||
|
element = soup_element.find('td', class_='gl2c')
|
||||||
|
|
||||||
a_tags = element.find_all('a')
|
if element is None:
|
||||||
for tag in a_tags:
|
return
|
||||||
artist = tag.text
|
|
||||||
a_url = tag.get('href')
|
trt_btn = element.find('div', class_='gldown')
|
||||||
print(" " + artist + " " + a_url)
|
trt_url = trt_btn.find('a')
|
||||||
|
|
||||||
|
if trt_url:
|
||||||
|
url = trt_url.get('href')
|
||||||
|
print("torrent : " + url)
|
||||||
|
else:
|
||||||
|
print("torrent : none")
|
||||||
|
|
||||||
|
|
||||||
def djDescParse(self, soup_element):
|
#
|
||||||
element = soup_element.find('table', class_='dj-desc')
|
def gl3cParse(soup_element):
|
||||||
tb_rows = element.find_all('tr')
|
element = soup_element.find('td', class_='gl3c glname')
|
||||||
for row in tb_rows:
|
|
||||||
tds = row.find_all('td')
|
if element is None:
|
||||||
if 2 != len(tds):
|
return
|
||||||
print("td get failed")
|
|
||||||
continue
|
elemenr_url = element.find('a')
|
||||||
|
man_url = elemenr_url.get('href')
|
||||||
|
|
||||||
|
element_title = element.find('div', class_='glink')
|
||||||
|
man_title = element_title.text
|
||||||
|
|
||||||
|
print("title : " + man_title)
|
||||||
|
print("Url : " + man_url)
|
||||||
|
|
||||||
|
print("tags : ")
|
||||||
|
tags = element.find_all('div', class_='gt')
|
||||||
|
man_tags = []
|
||||||
|
for tag in tags:
|
||||||
|
man_tag = tag.get('title')
|
||||||
|
print(" " + man_tag)
|
||||||
|
man_tags.append(man_tag)
|
||||||
|
|
||||||
|
print(len(man_tags))
|
||||||
|
|
||||||
|
|
||||||
print(tds[0].text + " : ")
|
|
||||||
|
|
||||||
a_tags = tds[1].find_all('a')
|
|
||||||
for tag in a_tags:
|
|
||||||
tag_name = tag.text
|
|
||||||
tag_url = tag.get('href')
|
|
||||||
print(" " + tag_name + " " + tag_url)
|
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -21,12 +21,10 @@ class MangaMetaInfo:
|
|||||||
#tags
|
#tags
|
||||||
|
|
||||||
|
|
||||||
class MangaInfo:
|
|
||||||
|
|
||||||
listResult = []
|
listResult = []
|
||||||
|
|
||||||
def GetSearchResult(self, searchWord):
|
def GetSearchResult(searchWord):
|
||||||
url = self.getSiteUrl() + searchWord
|
url = getSiteUrl() + searchWord
|
||||||
driver = webdriver.Chrome()
|
driver = webdriver.Chrome()
|
||||||
driver.get(url)
|
driver.get(url)
|
||||||
|
|
||||||
@@ -43,39 +41,39 @@ class MangaInfo:
|
|||||||
strContent = driver.page_source
|
strContent = driver.page_source
|
||||||
driver.quit()
|
driver.quit()
|
||||||
|
|
||||||
self.parseMangaInfos(strContent)
|
parseMangaInfos(strContent)
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
#
|
#
|
||||||
def getSiteUrl(self):
|
def getSiteUrl():
|
||||||
strRet = "https://hitomi.la/search.html?"
|
strRet = "https://hitomi.la/search.html?"
|
||||||
|
|
||||||
return strRet
|
return strRet
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
def parseMangaInfos(self, html_doc):
|
def parseMangaInfos(html_doc):
|
||||||
# BeautifulSoup 객체 생성
|
# BeautifulSoup 객체 생성
|
||||||
soup = BeautifulSoup(html_doc, 'html.parser')
|
soup = BeautifulSoup(html_doc, 'html.parser')
|
||||||
gallery_elements = soup.find_all(class_='gallery-content')
|
gallery_elements = soup.find_all(class_='gallery-content')
|
||||||
|
|
||||||
for element in gallery_elements:
|
for element in gallery_elements:
|
||||||
self.djParse(element)
|
djParse(element)
|
||||||
|
|
||||||
|
|
||||||
def djParse(self, soup_element):
|
def djParse(soup_element):
|
||||||
childs = soup_element.find_all(class_='dj')
|
childs = soup_element.find_all(class_='dj')
|
||||||
|
|
||||||
for child in childs:
|
for child in childs:
|
||||||
self.djtitleParse(child)
|
djtitleParse(child)
|
||||||
self.artistlistParse(child)
|
artistlistParse(child)
|
||||||
self.djDescParse(child)
|
djDescParse(child)
|
||||||
|
|
||||||
print("\r\n")
|
print("\r\n")
|
||||||
|
|
||||||
|
|
||||||
def djtitleParse(self, soup_element):
|
def djtitleParse(soup_element):
|
||||||
element = soup_element.find('h1', class_='lillie')
|
element = soup_element.find('h1', class_='lillie')
|
||||||
title = element.text
|
title = element.text
|
||||||
|
|
||||||
@@ -85,7 +83,7 @@ class MangaInfo:
|
|||||||
print("title : " + title)
|
print("title : " + title)
|
||||||
print("URl : " + a_url)
|
print("URl : " + a_url)
|
||||||
|
|
||||||
def artistlistParse(self, soup_element):
|
def artistlistParse(soup_element):
|
||||||
element = soup_element.find('div', class_='artist-list')
|
element = soup_element.find('div', class_='artist-list')
|
||||||
|
|
||||||
print("artists")
|
print("artists")
|
||||||
@@ -97,7 +95,7 @@ class MangaInfo:
|
|||||||
print(" " + artist + " " + a_url)
|
print(" " + artist + " " + a_url)
|
||||||
|
|
||||||
|
|
||||||
def djDescParse(self, soup_element):
|
def djDescParse(soup_element):
|
||||||
element = soup_element.find('table', class_='dj-desc')
|
element = soup_element.find('table', class_='dj-desc')
|
||||||
tb_rows = element.find_all('tr')
|
tb_rows = element.find_all('tr')
|
||||||
for row in tb_rows:
|
for row in tb_rows:
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ xls_name = "mangaDB.xlsx"
|
|||||||
list_MetaInfo = []
|
list_MetaInfo = []
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
def GetCurrentTime():
|
def GetCurrentTime():
|
||||||
# 현재 시간을 구하고 구조체로 변환
|
# 현재 시간을 구하고 구조체로 변환
|
||||||
@@ -38,7 +37,6 @@ def GetCurrentTime():
|
|||||||
# wb = Workbook()
|
# wb = Workbook()
|
||||||
# print("xls Created")
|
# print("xls Created")
|
||||||
|
|
||||||
|
|
||||||
# ws = wb.active
|
# ws = wb.active
|
||||||
# # time, title, url, tags (comma)
|
# # time, title, url, tags (comma)
|
||||||
# ws['A1'] = "Modified Time"
|
# ws['A1'] = "Modified Time"
|
||||||
|
|||||||
10
main.py
10
main.py
@@ -1,8 +1,12 @@
|
|||||||
import GetArc_Hitomi as gethitomi
|
import GetArc_Hitomi as getHitomi
|
||||||
|
import GetArc_Ehentai as getEhentai
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
obj = gethitomi.MangaInfo()
|
#getHitomi.GetSearchResult("trouble sweets")
|
||||||
obj.GetSearchResult("trouble sweets")
|
getEhentai.GetSearchResult("artist%3A%22kotomi+yo-ji%24%22")
|
||||||
|
|
||||||
|
#artist:"kotomi yo-ji$"
|
||||||
|
#"artist%3A%22kotomi+yo-ji%24%22"
|
||||||
|
|
||||||
|
|
||||||
# For Main Loop
|
# For Main Loop
|
||||||
|
|||||||
Reference in New Issue
Block a user