first added

2024-04-05 02:25:55 +09:00
commit dbe5377d6f
4 changed files with 322 additions and 0 deletions
--- a/GetArc_Ehentai.py
+++ b/GetArc_Ehentai.py
@@ -0,0 +1,118 @@
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.common.exceptions import TimeoutException
+from bs4 import BeautifulSoup
+
+
+class MangaMetaInfo:
+    def __init__(self, title, url, lang, manType, *tags):
+        self.title = title
+        self.url = url
+        self.lang = lang
+        self.manType = manType
+        self.tags = tags
+        pass
+    
+            #series
+            #type
+            #languages
+            #tags
+
+
+class MangaInfo:
+    
+    listResult = []
+        
+    def GetSearchResult(self, searchWord):
+        url = self.getSiteUrl() + searchWord
+        driver = webdriver.Chrome()
+        driver.get(url)
+        
+        # 웹페이지가 로드될 때까지 기다리기
+        try:
+            WebDriverWait(driver, 30).until(
+                EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
+            )
+        except TimeoutException:
+            print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
+            driver.quit()
+            return
+            
+        strContent = driver.page_source
+        driver.quit()
+        
+        self.parseMangaInfos(strContent)
+        
+        pass
+        
+    #
+    def getSiteUrl(self):
+        strRet = "https://e-hentai.org/search.html?"
+
+        return strRet
+        
+    
+    #
+    def parseMangaInfos(self, html_doc):
+        # BeautifulSoup 객체 생성
+        soup = BeautifulSoup(html_doc, 'html.parser')
+        gallery_elements = soup.find_all(class_='gallery-content')
+        
+        for element in gallery_elements:
+            self.djParse(element)
+            
+    
+    def djParse(self, soup_element):
+        childs = soup_element.find_all(class_='dj')
+        
+        for child in childs:
+            self.djtitleParse(child)
+            self.artistlistParse(child)
+            self.djDescParse(child)
+            
+            print("\r\n")
+            
+            
+    def djtitleParse(self, soup_element):
+        element = soup_element.find('h1', class_='lillie')
+        title = element.text
+
+        a_tag = element.find('a')
+        a_url = a_tag.get('href')
+            
+        print("title : " + title)
+        print("URl : " + a_url)
+        
+    def artistlistParse(self, soup_element):
+        element = soup_element.find('div', class_='artist-list')
+        
+        print("artists")
+        
+        a_tags = element.find_all('a')
+        for tag in a_tags:
+            artist = tag.text
+            a_url = tag.get('href')
+            print("    " + artist + " " + a_url)
+            
+    
+    def djDescParse(self, soup_element):
+        element = soup_element.find('table', class_='dj-desc')
+        tb_rows = element.find_all('tr')
+        for row in tb_rows:
+            tds = row.find_all('td')
+            if 2 != len(tds):
+                print("td get failed")
+                continue
+            
+            print(tds[0].text + " : ")
+            
+            a_tags = tds[1].find_all('a')
+            for tag in a_tags:
+                tag_name = tag.text
+                tag_url = tag.get('href')
+                print("        " + tag_name + " " + tag_url)
+            
+        pass
+
--- a/GetArc_Hitomi.py
+++ b/GetArc_Hitomi.py
@@ -0,0 +1,118 @@
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.common.exceptions import TimeoutException
+from bs4 import BeautifulSoup
+
+
+class MangaMetaInfo:
+    def __init__(self, title, url, lang, manType, *tags):
+        self.title = title
+        self.url = url
+        self.lang = lang
+        self.manType = manType
+        self.tags = tags
+        pass
+    
+            #series
+            #type
+            #languages
+            #tags
+
+
+class MangaInfo:
+    
+    listResult = []
+        
+    def GetSearchResult(self, searchWord):
+        url = self.getSiteUrl() + searchWord
+        driver = webdriver.Chrome()
+        driver.get(url)
+        
+        # 웹페이지가 로드될 때까지 기다리기
+        try:
+            WebDriverWait(driver, 30).until(
+                EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
+            )
+        except TimeoutException:
+            print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
+            driver.quit()
+            return
+            
+        strContent = driver.page_source
+        driver.quit()
+        
+        self.parseMangaInfos(strContent)
+        
+        pass
+        
+    #
+    def getSiteUrl(self):
+        strRet = "https://hitomi.la/search.html?"
+
+        return strRet
+        
+    
+    #
+    def parseMangaInfos(self, html_doc):
+        # BeautifulSoup 객체 생성
+        soup = BeautifulSoup(html_doc, 'html.parser')
+        gallery_elements = soup.find_all(class_='gallery-content')
+        
+        for element in gallery_elements:
+            self.djParse(element)
+            
+    
+    def djParse(self, soup_element):
+        childs = soup_element.find_all(class_='dj')
+        
+        for child in childs:
+            self.djtitleParse(child)
+            self.artistlistParse(child)
+            self.djDescParse(child)
+            
+            print("\r\n")
+            
+            
+    def djtitleParse(self, soup_element):
+        element = soup_element.find('h1', class_='lillie')
+        title = element.text
+
+        a_tag = element.find('a')
+        a_url = a_tag.get('href')
+            
+        print("title : " + title)
+        print("URl : " + a_url)
+        
+    def artistlistParse(self, soup_element):
+        element = soup_element.find('div', class_='artist-list')
+        
+        print("artists")
+        
+        a_tags = element.find_all('a')
+        for tag in a_tags:
+            artist = tag.text
+            a_url = tag.get('href')
+            print("    " + artist + " " + a_url)
+            
+    
+    def djDescParse(self, soup_element):
+        element = soup_element.find('table', class_='dj-desc')
+        tb_rows = element.find_all('tr')
+        for row in tb_rows:
+            tds = row.find_all('td')
+            if 2 != len(tds):
+                print("td get failed")
+                continue
+            
+            print(tds[0].text + " : ")
+            
+            a_tags = tds[1].find_all('a')
+            for tag in a_tags:
+                tag_name = tag.text
+                tag_url = tag.get('href')
+                print("        " + tag_name + " " + tag_url)
+            
+        pass
+
--- a/StoreXLS.py
+++ b/StoreXLS.py
@@ -0,0 +1,75 @@
+import os
+import time
+from openpyxl import Workbook
+from openpyxl import load_workbook
+from openpyxl.utils import get_column_letter
+
+
+xls_name = "mangaDB.xlsx"
+list_MetaInfo = []
+
+
+
+#
+def GetCurrentTime():
+    # 현재 시간을 구하고 구조체로 변환
+    current_time_struct = time.localtime()
+
+    # 구조체에서 연, 월, 일, 시간, 분, 초를 추출
+    year = current_time_struct.tm_year
+    month = current_time_struct.tm_mon
+    day = current_time_struct.tm_mday
+    hour = current_time_struct.tm_hour
+    minute = current_time_struct.tm_min
+    second = current_time_struct.tm_sec
+    
+    strRet = (f"{year}/{month}/{day}_{hour}:{minute}:{second}")
+
+    return strRet 
+
+
+# #
+# def XLSWriteMangainfo(title, url, *tags):
+#     #    
+#     try:
+#         wb = load_workbook(xls_path)
+#         print("Open Successed")
+#     except FileNotFoundError:
+#         wb = Workbook()
+#         print("xls Created")
+       
+        
+#     ws = wb.active
+#     # time, title, url, tags (comma)
+#     ws['A1'] = "Modified Time"
+#     ws['B1'] = int(time.time())
+    
+    
+#     if 'list' not in wb.sheetnames:
+#         ws1 = wb.create_sheet(title='list')
+#         print('list sheet created')
+    
+#     wb.save(xls_path)
+    
+#     ws2 = wb['list']
+
+# # 폴더 경로
+# folder_path = '/media/gerd/test/hiyobi_temp/'
+
+# # 폴더 내의 파일 및 폴더 목록 가져오기
+# items = os.listdir(folder_path)
+
+# index = 2
+# # 파일 및 폴더 목록 출력
+# for item in items:
+#     pos = 'A' + str(index)
+#     ws2[pos] = item
+#     index += 1
+#     #print(item)
+
+# print(str(index) + " searched")
+# wb.save(xls_path)
+
+# wb.close()
+
+
--- a/main.py
+++ b/main.py
@@ -0,0 +1,11 @@
+import GetArc_Hitomi as gethitomi
+
+def main():
+    obj = gethitomi.MangaInfo()
+    obj.GetSearchResult("trouble sweets")
+
+    
+# For Main Loop
+if __name__ == '__main__':
+    main()
+