first added

2024-04-05 02:25:55 +09:00
commit dbe5377d6f
4 changed files with 322 additions and 0 deletions
--- a/GetArc_Ehentai.py
+++ b/GetArc_Ehentai.py
@@ -0,0 +1,118 @@
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.common.exceptions import TimeoutException
+from bs4 import BeautifulSoup
+
+
+class MangaMetaInfo:
+    def __init__(self, title, url, lang, manType, *tags):
+        self.title = title
+        self.url = url
+        self.lang = lang
+        self.manType = manType
+        self.tags = tags
+        pass
+    
+            #series
+            #type
+            #languages
+            #tags
+
+
+class MangaInfo:
+    
+    listResult = []
+        
+    def GetSearchResult(self, searchWord):
+        url = self.getSiteUrl() + searchWord
+        driver = webdriver.Chrome()
+        driver.get(url)
+        
+        # 웹페이지가 로드될 때까지 기다리기
+        try:
+            WebDriverWait(driver, 30).until(
+                EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
+            )
+        except TimeoutException:
+            print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
+            driver.quit()
+            return
+            
+        strContent = driver.page_source
+        driver.quit()
+        
+        self.parseMangaInfos(strContent)
+        
+        pass
+        
+    #
+    def getSiteUrl(self):
+        strRet = "https://e-hentai.org/search.html?"
+
+        return strRet
+        
+    
+    #
+    def parseMangaInfos(self, html_doc):
+        # BeautifulSoup 객체 생성
+        soup = BeautifulSoup(html_doc, 'html.parser')
+        gallery_elements = soup.find_all(class_='gallery-content')
+        
+        for element in gallery_elements:
+            self.djParse(element)
+            
+    
+    def djParse(self, soup_element):
+        childs = soup_element.find_all(class_='dj')
+        
+        for child in childs:
+            self.djtitleParse(child)
+            self.artistlistParse(child)
+            self.djDescParse(child)
+            
+            print("\r\n")
+            
+            
+    def djtitleParse(self, soup_element):
+        element = soup_element.find('h1', class_='lillie')
+        title = element.text
+
+        a_tag = element.find('a')
+        a_url = a_tag.get('href')
+            
+        print("title : " + title)
+        print("URl : " + a_url)
+        
+    def artistlistParse(self, soup_element):
+        element = soup_element.find('div', class_='artist-list')
+        
+        print("artists")
+        
+        a_tags = element.find_all('a')
+        for tag in a_tags:
+            artist = tag.text
+            a_url = tag.get('href')
+            print("    " + artist + " " + a_url)
+            
+    
+    def djDescParse(self, soup_element):
+        element = soup_element.find('table', class_='dj-desc')
+        tb_rows = element.find_all('tr')
+        for row in tb_rows:
+            tds = row.find_all('td')
+            if 2 != len(tds):
+                print("td get failed")
+                continue
+            
+            print(tds[0].text + " : ")
+            
+            a_tags = tds[1].find_all('a')
+            for tag in a_tags:
+                tag_name = tag.text
+                tag_url = tag.get('href')
+                print("        " + tag_name + " " + tag_url)
+            
+        pass
+