Update DataClass.py, GetArc_Ehentai.py, and 7 more files...

데이터 클래스 정의, 퍼필 다운로더 json 파서...
2024-05-13 21:48:40 +09:00
parent cc02151899
commit e9f8a7323c
9 changed files with 762 additions and 218 deletions
--- a/GetArc_Hitomi.py
+++ b/GetArc_Hitomi.py
@@ -6,9 +6,10 @@ from selenium.common.exceptions import TimeoutException
 from bs4 import BeautifulSoup

 import UtilPack as util
+import DataClass as info

-listResult = []
-    
+
+#
 def GetSearchResult(searchWord):
    url = getSiteUrl(searchWord)
    
@@ -39,70 +40,104 @@ def getSiteUrl(searchWord):
    strRet = "https://hitomi.la/"
    
    if False == util.IsEmptyStr(searchWord):
-        strRet = strRet + "search.html?" + searchWord
+        if False == searchWord.isdigit():
+            strRet = strRet + "search.html?" + searchWord
+        else:
+            strRet = strRet + "galleries/" + searchWord + ".html"

    return strRet
    
-
 #
 def parseMangaInfos(html_doc):
    # BeautifulSoup 객체 생성
    soup = BeautifulSoup(html_doc, 'html.parser')
    gallery_elements = soup.find_all(class_='gallery-content')
    
+    listDJs = []
    for element in gallery_elements:
-        djParse(element)
+        listDJ = djParse(element)
+        listDJs.extend(listDJ)
        
+    print(len(listDJs))
+

 def djParse(soup_element):
    childs = soup_element.find_all(class_='dj')
    
+    listInfos = []
    for child in childs:
-        djtitleParse(child)
-        artistlistParse(child)
-        djDescParse(child)
+        info = djTitleParse(child)
        
-        print("\r\n")
+        listTag1 = djArtistParse(child, info)
+        listTag2 = djDescParse(child, info)
        
+        listInfos.append(info)
        
-def djtitleParse(soup_element):
-    element = soup_element.find('h1', class_='lillie')
+    return listInfos
+
+
+def djTitleParse(input_element):
+    element = input_element.find('h1', class_='lillie')
    title = element.text

    a_tag = element.find('a')
-    a_url = a_tag.get('href')
-        
-    print("title : " + title)
-    print("URl : " + a_url)
+    url = a_tag.get('href')
    
-def artistlistParse(soup_element):
-    element = soup_element.find('div', class_='artist-list')
+    util.DbgOut("title : " + title)
+    util.DbgOut("URl : " + url)
    
-    print("artists")
+    return info.CBZInfo(title, url)
+
+
+def djArtistParse(input_element, retPtr):
+    element = input_element.find('div', class_='artist-list')
    
    a_tags = element.find_all('a')
+    listArtists = []
    for tag in a_tags:
        artist = tag.text
        a_url = tag.get('href')
-        print("    " + artist + " " + a_url)
+        retPtr.AddArtist(artist)
+        listArtists.append( info.TagInfo(artist, a_url) )
        
+    return listArtists

-def djDescParse(soup_element):
-    element = soup_element.find('table', class_='dj-desc')
+
+def djDescParse(input_element, retPtr):
+    element = input_element.find('table', class_='dj-desc')
    tb_rows = element.find_all('tr')
+    listTags = []
    for row in tb_rows:
        tds = row.find_all('td')
        if 2 != len(tds):
-            print("td get failed")
+            util.DbgOut("Warning : td get failed")
            continue
        
-        print(tds[0].text + " : ")
+        
+        outMsg = f"{tds[0].text} : \r\n"
        
        a_tags = tds[1].find_all('a')
        for tag in a_tags:
            tag_name = tag.text
            tag_url = tag.get('href')
-            print("        " + tag_name + " " + tag_url)
+            
+            retPtr.AddTag(tag_name)
+            
+            listTags.append(info.TagInfo(tag_name, tag_url))
+            
+            outMsg += f"        {tag_name} {tag_url}\r\n"
        
-    pass
+        util.DbgOut(outMsg)
+        
+        #
+        if "Series" == tds[0]:
+            retPtr.serires = listTags[-1].name
+        elif "Type" == tds[0]:
+            retPtr.type = listTags[-1].name
+        elif "Language" == tds[0]:
+            retPtr.language = listTags[-1].name
+        else:
+            pass
+    
+    return listTags