Update DataClass.py, GetArc_Ehentai.py, and 3 more files...

E-hentai Page Parse DataClass added
2024-04-05 21:51:36 +09:00
parent dbe5377d6f
commit 809748a73a
5 changed files with 202 additions and 192 deletions
--- a/GetArc_Hitomi.py
+++ b/GetArc_Hitomi.py
@@ -21,98 +21,96 @@ class MangaMetaInfo:
            #tags


-class MangaInfo:
+listResult = []
    
-    listResult = []
-        
-    def GetSearchResult(self, searchWord):
-        url = self.getSiteUrl() + searchWord
-        driver = webdriver.Chrome()
-        driver.get(url)
-        
-        # 웹페이지가 로드될 때까지 기다리기
-        try:
-            WebDriverWait(driver, 30).until(
-                EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
-            )
-        except TimeoutException:
-            print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
-            driver.quit()
-            return
-            
-        strContent = driver.page_source
+def GetSearchResult(searchWord):
+    url = getSiteUrl() + searchWord
+    driver = webdriver.Chrome()
+    driver.get(url)
+    
+    # 웹페이지가 로드될 때까지 기다리기
+    try:
+        WebDriverWait(driver, 30).until(
+            EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
+        )
+    except TimeoutException:
+        print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
        driver.quit()
+        return
        
-        self.parseMangaInfos(strContent)
-        
-        pass
-        
-    #
-    def getSiteUrl(self):
-        strRet = "https://hitomi.la/search.html?"
-
-        return strRet
-        
+    strContent = driver.page_source
+    driver.quit()
    
-    #
-    def parseMangaInfos(self, html_doc):
-        # BeautifulSoup 객체 생성
-        soup = BeautifulSoup(html_doc, 'html.parser')
-        gallery_elements = soup.find_all(class_='gallery-content')
-        
-        for element in gallery_elements:
-            self.djParse(element)
-            
+    parseMangaInfos(strContent)
    
-    def djParse(self, soup_element):
-        childs = soup_element.find_all(class_='dj')
-        
-        for child in childs:
-            self.djtitleParse(child)
-            self.artistlistParse(child)
-            self.djDescParse(child)
-            
-            print("\r\n")
-            
-            
-    def djtitleParse(self, soup_element):
-        element = soup_element.find('h1', class_='lillie')
-        title = element.text
+    pass
+    
+#
+def getSiteUrl():
+    strRet = "https://hitomi.la/search.html?"

-        a_tag = element.find('a')
-        a_url = a_tag.get('href')
-            
-        print("title : " + title)
-        print("URl : " + a_url)
+    return strRet
+    
+
+#
+def parseMangaInfos(html_doc):
+    # BeautifulSoup 객체 생성
+    soup = BeautifulSoup(html_doc, 'html.parser')
+    gallery_elements = soup.find_all(class_='gallery-content')
+    
+    for element in gallery_elements:
+        djParse(element)
        
-    def artistlistParse(self, soup_element):
-        element = soup_element.find('div', class_='artist-list')
+
+def djParse(soup_element):
+    childs = soup_element.find_all(class_='dj')
+    
+    for child in childs:
+        djtitleParse(child)
+        artistlistParse(child)
+        djDescParse(child)
        
-        print("artists")
+        print("\r\n")
        
-        a_tags = element.find_all('a')
+        
+def djtitleParse(soup_element):
+    element = soup_element.find('h1', class_='lillie')
+    title = element.text
+
+    a_tag = element.find('a')
+    a_url = a_tag.get('href')
+        
+    print("title : " + title)
+    print("URl : " + a_url)
+    
+def artistlistParse(soup_element):
+    element = soup_element.find('div', class_='artist-list')
+    
+    print("artists")
+    
+    a_tags = element.find_all('a')
+    for tag in a_tags:
+        artist = tag.text
+        a_url = tag.get('href')
+        print("    " + artist + " " + a_url)
+        
+
+def djDescParse(soup_element):
+    element = soup_element.find('table', class_='dj-desc')
+    tb_rows = element.find_all('tr')
+    for row in tb_rows:
+        tds = row.find_all('td')
+        if 2 != len(tds):
+            print("td get failed")
+            continue
+        
+        print(tds[0].text + " : ")
+        
+        a_tags = tds[1].find_all('a')
        for tag in a_tags:
-            artist = tag.text
-            a_url = tag.get('href')
-            print("    " + artist + " " + a_url)
-            
-    
-    def djDescParse(self, soup_element):
-        element = soup_element.find('table', class_='dj-desc')
-        tb_rows = element.find_all('tr')
-        for row in tb_rows:
-            tds = row.find_all('td')
-            if 2 != len(tds):
-                print("td get failed")
-                continue
-            
-            print(tds[0].text + " : ")
-            
-            a_tags = tds[1].find_all('a')
-            for tag in a_tags:
-                tag_name = tag.text
-                tag_url = tag.get('href')
-                print("        " + tag_name + " " + tag_url)
-            
-        pass
+            tag_name = tag.text
+            tag_url = tag.get('href')
+            print("        " + tag_name + " " + tag_url)
+        
+    pass