first added

2024-04-05 02:25:55 +09:00
commit dbe5377d6f
4 changed files with 322 additions and 0 deletions
--- a/GetArc_Ehentai.py
+++ b/GetArc_Ehentai.py
@@ -0,0 +1,118 @@
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.common.exceptions import TimeoutException
 from bs4 import BeautifulSoup
 class MangaMetaInfo:
    def __init__(self, title, url, lang, manType, *tags):
        self.title = title
        self.url = url
        self.lang = lang
        self.manType = manType
        self.tags = tags
        pass
            #series
            #type
            #languages
            #tags
 class MangaInfo:
    listResult = []
    def GetSearchResult(self, searchWord):
        url = self.getSiteUrl() + searchWord
        driver = webdriver.Chrome()
        driver.get(url)
        # 웹페이지가 로드될 때까지 기다리기
        try:
            WebDriverWait(driver, 30).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
            )
        except TimeoutException:
            print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
            driver.quit()
            return
        strContent = driver.page_source
        driver.quit()
        self.parseMangaInfos(strContent)
        pass
    #
    def getSiteUrl(self):
        strRet = "https://e-hentai.org/search.html?"
        return strRet
    #
    def parseMangaInfos(self, html_doc):
        # BeautifulSoup 객체 생성
        soup = BeautifulSoup(html_doc, 'html.parser')
        gallery_elements = soup.find_all(class_='gallery-content')
        for element in gallery_elements:
            self.djParse(element)
    def djParse(self, soup_element):
        childs = soup_element.find_all(class_='dj')
        for child in childs:
            self.djtitleParse(child)
            self.artistlistParse(child)
            self.djDescParse(child)
            print("\r\n")
    def djtitleParse(self, soup_element):
        element = soup_element.find('h1', class_='lillie')
        title = element.text
        a_tag = element.find('a')
        a_url = a_tag.get('href')
        print("title : " + title)
        print("URl : " + a_url)
    def artistlistParse(self, soup_element):
        element = soup_element.find('div', class_='artist-list')
        print("artists")
        a_tags = element.find_all('a')
        for tag in a_tags:
            artist = tag.text
            a_url = tag.get('href')
            print("    " + artist + " " + a_url)
    def djDescParse(self, soup_element):
        element = soup_element.find('table', class_='dj-desc')
        tb_rows = element.find_all('tr')
        for row in tb_rows:
            tds = row.find_all('td')
            if 2 != len(tds):
                print("td get failed")
                continue
            print(tds[0].text + " : ")
            a_tags = tds[1].find_all('a')
            for tag in a_tags:
                tag_name = tag.text
                tag_url = tag.get('href')
                print("        " + tag_name + " " + tag_url)
        pass
--- a/GetArc_Hitomi.py
+++ b/GetArc_Hitomi.py
@@ -0,0 +1,118 @@
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.common.exceptions import TimeoutException
 from bs4 import BeautifulSoup
 class MangaMetaInfo:
    def __init__(self, title, url, lang, manType, *tags):
        self.title = title
        self.url = url
        self.lang = lang
        self.manType = manType
        self.tags = tags
        pass
            #series
            #type
            #languages
            #tags
 class MangaInfo:
    listResult = []
    def GetSearchResult(self, searchWord):
        url = self.getSiteUrl() + searchWord
        driver = webdriver.Chrome()
        driver.get(url)
        # 웹페이지가 로드될 때까지 기다리기
        try:
            WebDriverWait(driver, 30).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
            )
        except TimeoutException:
            print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
            driver.quit()
            return
        strContent = driver.page_source
        driver.quit()
        self.parseMangaInfos(strContent)
        pass
    #
    def getSiteUrl(self):
        strRet = "https://hitomi.la/search.html?"
        return strRet
    #
    def parseMangaInfos(self, html_doc):
        # BeautifulSoup 객체 생성
        soup = BeautifulSoup(html_doc, 'html.parser')
        gallery_elements = soup.find_all(class_='gallery-content')
        for element in gallery_elements:
            self.djParse(element)
    def djParse(self, soup_element):
        childs = soup_element.find_all(class_='dj')
        for child in childs:
            self.djtitleParse(child)
            self.artistlistParse(child)
            self.djDescParse(child)
            print("\r\n")
    def djtitleParse(self, soup_element):
        element = soup_element.find('h1', class_='lillie')
        title = element.text
        a_tag = element.find('a')
        a_url = a_tag.get('href')
        print("title : " + title)
        print("URl : " + a_url)
    def artistlistParse(self, soup_element):
        element = soup_element.find('div', class_='artist-list')
        print("artists")
        a_tags = element.find_all('a')
        for tag in a_tags:
            artist = tag.text
            a_url = tag.get('href')
            print("    " + artist + " " + a_url)
    def djDescParse(self, soup_element):
        element = soup_element.find('table', class_='dj-desc')
        tb_rows = element.find_all('tr')
        for row in tb_rows:
            tds = row.find_all('td')
            if 2 != len(tds):
                print("td get failed")
                continue
            print(tds[0].text + " : ")
            a_tags = tds[1].find_all('a')
            for tag in a_tags:
                tag_name = tag.text
                tag_url = tag.get('href')
                print("        " + tag_name + " " + tag_url)
        pass
--- a/StoreXLS.py
+++ b/StoreXLS.py
@@ -0,0 +1,75 @@
 import os
 import time
 from openpyxl import Workbook
 from openpyxl import load_workbook
 from openpyxl.utils import get_column_letter
 xls_name = "mangaDB.xlsx"
 list_MetaInfo = []
 #
 def GetCurrentTime():
    # 현재 시간을 구하고 구조체로 변환
    current_time_struct = time.localtime()
    # 구조체에서 연, 월, 일, 시간, 분, 초를 추출
    year = current_time_struct.tm_year
    month = current_time_struct.tm_mon
    day = current_time_struct.tm_mday
    hour = current_time_struct.tm_hour
    minute = current_time_struct.tm_min
    second = current_time_struct.tm_sec
    strRet = (f"{year}/{month}/{day}_{hour}:{minute}:{second}")
    return strRet 
 # #
 # def XLSWriteMangainfo(title, url, *tags):
 #     #    
 #     try:
 #         wb = load_workbook(xls_path)
 #         print("Open Successed")
 #     except FileNotFoundError:
 #         wb = Workbook()
 #         print("xls Created")
 #     ws = wb.active
 #     # time, title, url, tags (comma)
 #     ws['A1'] = "Modified Time"
 #     ws['B1'] = int(time.time())
 #     if 'list' not in wb.sheetnames:
 #         ws1 = wb.create_sheet(title='list')
 #         print('list sheet created')
 #     wb.save(xls_path)
 #     ws2 = wb['list']
 # # 폴더 경로
 # folder_path = '/media/gerd/test/hiyobi_temp/'
 # # 폴더 내의 파일 및 폴더 목록 가져오기
 # items = os.listdir(folder_path)
 # index = 2
 # # 파일 및 폴더 목록 출력
 # for item in items:
 #     pos = 'A' + str(index)
 #     ws2[pos] = item
 #     index += 1
 #     #print(item)
 # print(str(index) + " searched")
 # wb.save(xls_path)
 # wb.close()
--- a/main.py
+++ b/main.py
@@ -0,0 +1,11 @@
 import GetArc_Hitomi as gethitomi
 def main():
    obj = gethitomi.MangaInfo()
    obj.GetSearchResult("trouble sweets")
 # For Main Loop
 if __name__ == '__main__':
    main()