Update .gitignore, DataClass.py, and 19 more files...

오랜만에 서버 정리하고 커밋. 파일 위치를 정리했다. 캘리버 DB 를 열고 정보를 열람. Pupil 을 통해 다운받은 정보를 관리하기 위해 새로운 클래스 추가
2025-08-01 14:57:40 +09:00
parent f1345e2770
commit d5f2d82bc9
18 changed files with 1266 additions and 1079 deletions
--- a/GetArc_Hitomi.py
+++ b/GetArc_Hitomi.py
@@ -6,180 +6,209 @@ from selenium.common.exceptions import TimeoutException
 from bs4 import BeautifulSoup

 import time
-
 import UtilPack as util
 import DataClass as info

-
 #
-def GetSearchResult(searchWord):
-    url = getSiteUrl(searchWord)
+class GetArc_Hitomi:
+    m_strBaseURL = "https://hitomi.la/"
+
+    m_listTagsTemp = list[info.TagInfo]()
+
+    def __init__(self):
+        pass
+
+    def GetSearchResult(self, strWord: str, bSaveHTML: bool = False):
+        if util.IsEmptyStr(strWord):
+            util.DbgOut("Error : SearchWord is empty", True)
+            return
    
-    util.DbgOut("Hitomi : " + url, True)
-
-    driver = webdriver.Chrome()
-    driver.get(url)
-    
-    # 웹페이지가 로드될 때까지 기다리기
-    try:
-        WebDriverWait(driver, 10).until(
-            #EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
-            lambda d: d.execute_script("return document.readyState") == "complete"
-        )
-    except TimeoutException:
-        util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.", True)
-        driver.quit()
-        return
-        
-    strContent = driver.page_source
-    
-    listRet = parseMangaInfos(strContent)
-    
-    for Idx in range(len(listRet)):
-        print(f"{Idx} : {listRet[Idx]}")
-
-    driver.quit()
-
-def GetListSearchResult(list_ID):
-    driver = webdriver.Chrome()  
-
-    # 웹페이지가 로드될 때까지 기다리기
-    try:
-        for id in list_ID:
-            url = getSiteUrl(id)
-            util.DbgOut("Hitomi : " + url, True)
-
-            driver.get(url)
-
-            WebDriverWait(driver, 10).until(
-            lambda d: d.execute_script("return document.readyState") == "complete"
-            )
-
-            time.sleep(2)
-
-            strContent = driver.page_source
-            listRet = parseMangaInfos(strContent)
-    
-            #for Idx in range(len(listRet)):
-            #    print(f"{Idx} : {listRet[Idx]}")
-
-            try:                
-                for Idx in range(len(listRet)):
-                    print(f"{Idx} : {listRet[Idx]}")
-                    with open( f"{id}.txt", 'w') as file:
-                        for item in listRet[Idx]:
-                            file.write( + "\n")
-                    file.close()
-            except IOError:
-                util.DbgOut(f"Error: Could not write to the file at {id}.txt.", True)
-
-    except Exception as e:
-        util.DbgOut("Hitomi Loading Error : ", e)
-    finally:
-        driver.quit()
-
-
-def getSiteUrl(searchWord):
-    strRet = "https://hitomi.la/"
-    
-    if False == searchWord.isdigit():
-        strRet = f"{strRet}search.html?{searchWord}"
-    else:
-        strRet = f"{strRet}galleries/{searchWord}.html"
-
-    return strRet
-    
-#
-def parseMangaInfos(html_doc):
-    # BeautifulSoup 객체 생성
-    soup = BeautifulSoup(html_doc, 'html.parser')
-    gallery_elements = soup.find_all(class_='gallery-content')
-    
-    listDJs = []
-    for element in gallery_elements:
-        listDJ = djParse(element)
-        listDJs.extend(listDJ)
-    
-    return listDJs
-
-
-def djParse(soup_element):
-    childs = soup_element.find_all(class_='dj')
-    
-    listInfos = []
-    for child in childs:
-        info = djTitleParse(child)
-        
-        listTag1 = djArtistParse(child, info)
-        listTag2 = djDescParse(child, info)
-        
-        listInfos.append(info)
-        
-    return listInfos
-
-
-def djTitleParse(input_element):
-    element = input_element.find('h1', class_='lillie')
-    title = element.text
-
-    a_tag = element.find('a')
-    url = a_tag.get('href')
-    
-    #util.DbgOut("title : " + title)
-    #util.DbgOut("URl : " + url)
-    
-    return info.CBZInfo(title, url)
-
-
-def djArtistParse(input_element, retPtr):
-    element = input_element.find('div', class_='artist-list')
-    
-    a_tags = element.find_all('a')
-    listArtists = []
-    for tag in a_tags:
-        artist = tag.text
-        a_url = tag.get('href')
-        retPtr.AddArtist(artist)
-        listArtists.append( info.TagInfo(artist, a_url) )
-        
-    return listArtists
-
-
-def djDescParse(input_element, retPtr):
-    element = input_element.find('table', class_='dj-desc')
-    tb_rows = element.find_all('tr')
-    listTags = []
-    for row in tb_rows:
-        tds = row.find_all('td')
-        if 2 != len(tds):
-            util.DbgOut("Warning : td get failed")
-            continue
-        
-        
-        outMsg = f"{tds[0].text} : \r\n"
-        
-        a_tags = tds[1].find_all('a')
-        for tag in a_tags:
-            tag_name = tag.text
-            tag_url = tag.get('href')
-            
-            retPtr.AddTag(tag_name)
-            
-            listTags.append(info.TagInfo(tag_name, tag_url))
-            
-            outMsg += f"        {tag_name} {tag_url}\r\n"
-        
-        #util.DbgOut(outMsg)
-        
-        #
-        if "Series" == tds[0]:
-            retPtr.serires = listTags[-1].name
-        elif "Type" == tds[0]:
-            retPtr.type = listTags[-1].name
-        elif "Language" == tds[0]:
-            retPtr.language = listTags[-1].name
+        strURL = ""
+        if strWord.isdigit():
+            strURL = self.getSiteUrlForGallery(int(strWord))
        else:
-            pass
-    
-    return listTags
+            strURL = self.getSiteUrlForSearch(strWord)
+        
+        util.DbgOut(f"Hitomi : {strURL}", True)

+        driver = webdriver.Chrome()
+        driver.get(strURL)
+        
+        # 웹페이지가 로드될 때까지 기다리기
+        try:
+            WebDriverWait(driver, 10).until(
+                #EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
+                lambda d: d.execute_script("return document.readyState") == "complete"
+            )
+        except TimeoutException:
+            util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.", True)
+            driver.quit()
+            return
+            
+        strContent = driver.page_source
+        driver.quit()
+
+        if True == bSaveHTML:
+            strFileName = f"{strWord}_result.html"
+            with open(strFileName, "w", encoding="utf-8") as file:
+                file.write(strContent)
+
+            util.DbgOut(f"HTML content saved to {strFileName}", True)
+        
+        listRet = self.parseMangaInfos(strContent)
+        
+        for Idx in range(len(listRet)):
+            util.DbgOut(f"{Idx} : {listRet[Idx]}", True)
+
+    #
+    def GetListSearchResult(self, listID: list[int], bSave: bool = False):
+        driver = webdriver.Chrome()  
+
+        # 웹페이지가 로드될 때까지 기다리기
+        try:
+            for nID in listID:
+                strURL = self.getSiteUrlForGallery(nID)
+                util.DbgOut(f"Hitomi : {strURL}", True)
+
+                driver.get(strURL)
+
+                WebDriverWait(driver, 10).until(
+                lambda d: d.execute_script("return document.readyState") == "complete"
+                )
+
+                time.sleep(2)
+
+                strContent = driver.page_source
+                listRet = self.parseMangaInfos(strContent)
+        
+                #for Idx in range(len(listRet)):
+                #    print(f"{Idx} : {listRet[Idx]}")
+
+                try:                
+                    for Idx in range(len(listRet)):
+                        print(f"{Idx} : {listRet[Idx]}")
+                        with open( f"{id}.txt", 'w') as file:
+                            for item in listRet[Idx]:
+                                file.write( + "\n")
+                except IOError:
+                    util.DbgOut(f"Error: Could not write to the file at {id}.txt.", True)
+
+        except Exception as e:
+            util.DbgOut(f"Hitomi Loading Error : {e}", True)
+        finally:
+            driver.quit()
+
+
+    def getSiteUrlForSearch(self, searchWord: str) -> str:
+        return f"{self.m_strBaseURL}search.html?{searchWord}"
+    
+    def getSiteUrlForGallery(self, nHitomiID: int) -> str:
+        return f"{self.m_strBaseURL}galleries/{nHitomiID}.html"
+        
+    #
+    def parseMangaInfos(self, html_doc : str) -> list[info.CBZInfo]:
+        # BeautifulSoup 객체 생성
+        soup = BeautifulSoup(html_doc, 'html.parser')
+        gallery_elements = soup.find_all(class_='gallery-content')
+        
+        listDJs: list[info.CBZInfo] = []
+        for element in gallery_elements:
+            listDJ = self.djParse(element)
+            listDJs.extend(listDJ)
+        
+        return listDJs
+
+    #
+    def djParse(self, soup_element) -> list[info.CBZInfo]:
+        childs = soup_element.find_all(class_='dj')
+        
+        listInfos: list[info.CBZInfo] = []
+        for child in childs:
+            info = self.djTitleParse(child)
+            self.djArtistParse(child, info)
+            self.djDescParse(child, info)
+            
+            listInfos.append(info)
+            
+        return listInfos
+
+    #
+    def djTitleParse(self, input_element):
+        element = input_element.find('h1', class_='lillie')
+        strTitle: str = element.text
+
+        a_tag = element.find('a')
+        strURL: str = a_tag.get('href')
+        
+        #util.DbgOut("title : " + title)
+        #util.DbgOut("URl : " + url)
+        
+        return info.CBZInfo(strTitle, strURL)
+
+    #
+    def djArtistParse(self, input_element, retPtr):
+        element = input_element.find('div', class_='artist-list')
+        
+        a_tags = element.find_all('a')
+        for tag in a_tags:
+            artist = tag.text
+            a_url = tag.get('href')
+            retPtr.AddArtist(artist)
+
+    #
+    def djDescParse(self, input_element, retPtr):
+        element = input_element.find('table', class_='dj-desc')
+        tb_rows = element.find_all('tr')
+        listTags = []
+        for row in tb_rows:
+            tds = row.find_all('td')
+            if 2 != len(tds):
+                util.DbgOut("Warning : td get failed")
+                continue
+             
+            outMsg = f"{tds[0].text} : \r\n"
+            
+            a_tags = tds[1].find_all('a')
+            for tag in a_tags:
+                tag_name = tag.text
+                tag_url = tag.get('href')
+                
+                retPtr.AddTag(tag_name)
+                
+                listTags.append(info.TagInfo(tag_name, tag_url))
+                
+                outMsg += f"        {tag_name} {tag_url}\r\n"
+            
+            #util.DbgOut(outMsg)
+            
+            #
+            if "Series" == tds[0]:
+                retPtr.serires = listTags[-1].name
+            elif "Type" == tds[0]:
+                retPtr.type = listTags[-1].name
+            elif "Language" == tds[0]:
+                retPtr.language = listTags[-1].name
+            else:
+                pass
+        
+        return listTags
+
+
+
+def main():
+    # Hitomi Search Test
+    hitomi = GetArc_Hitomi()
+    
+    # 검색어로 검색
+    #hitomi.GetSearchResult("test")
+    
+    # ID로 검색
+    hitomi.GetSearchResult("11107", True)
+    
+    # ID 리스트로 검색
+    #listID = [1234567, 2345678, 3456789]
+    #hitomi.GetListSearchResult(listID, True)
+
+# For Main Loop
+if __name__ == '__main__':
+    main()