utility class added

자료 저장 구조 고민중...
2024-04-08 01:33:15 +09:00
parent 809748a73a
commit cc02151899
6 changed files with 217 additions and 61 deletions
--- a/DataClass.py
+++ b/DataClass.py
@@ -1,13 +1,48 @@
 class MangaMetaInfo:
-    def __init__(self, title, url, lang, manType, *tags):
-        self.title = title
-        self.url = url
-        self.lang = lang
-        self.manType = manType
-        self.tags = tags
-        pass
+    title = ""
+    url = ""
+    Series = ""
+    artists = []
+    Tags = []
+    fileurl = ""
+    galID = ""
    
-            #series
-            #type
-            #languages
-            #tags
+    def AddArtist(self, strArtist):
+        self.artists.append(strArtist)
+        
+    def AddTag(self, strTag):
+        self.tags.append(strTag)        
+    
+class SeriesInfo:
+    strSeries = ""
+    strUrl = ""
+    
+    def __init__(self, series, url):
+        self.strSeries = series
+        self.strUrl = url
+    
+    
+class TypeInfo:
+    strTyep = ""
+    strUrl = ""
+    
+    def __init__(self, strType, url):
+        self.strTyep = strType
+        self.strUrl = url
+
+class ArtistInfo:
+    strArtist = ""
+    strUrl = ""
+    
+    def __init__(self, artist, url):
+        self.strArtist = artist
+        self.strUrl = url
+    
+class TagIngo:
+    strTag = ""
+    strUrl = ""
+    
+    def __init__(self, tag, url):
+        self.strTag = tag
+        self.strUrl = url
+        
--- a/GetArc_Ehentai.py
+++ b/GetArc_Ehentai.py
@@ -5,13 +5,17 @@ from selenium.webdriver.support import expected_conditions as EC
 from selenium.common.exceptions import TimeoutException
 from bs4 import BeautifulSoup

+import UtilPack as util
+

 listResult = []
    
+#
 def GetSearchResult(searchWord):
-    print("E-hentai start")
-    #url = getSiteUrl() + searchWord
-    url = "https://e-hentai.org/"
+    url = getSiteUrl(searchWord)
+    
+    util.DbgOut("EHentai : " + url)
+    
    driver = webdriver.Chrome()
    driver.get(url)
    
@@ -21,7 +25,7 @@ def GetSearchResult(searchWord):
            EC.presence_of_element_located((By.CLASS_NAME, 'dp'))
        )
    except TimeoutException:
-        print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
+        util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
        driver.quit()
        return
        
@@ -33,8 +37,10 @@ def GetSearchResult(searchWord):
    pass
    
 #
-def getSiteUrl():
-    strRet = "https://e-hentai.org/?f_search="
+def getSiteUrl(searchWord):
+    strRet = "https://e-hentai.org/"
+    if False == util.IsEmptyStr(searchWord):
+        strRet = strRet + "?f_search=" + searchWord

    return strRet
    
@@ -46,7 +52,6 @@ def parseMangaInfos(html_doc):
    gallery_table = soup.find('table', class_='itg gltc')
    gls = gallery_table.find_all('tr')
    
-    idx = 1;
    for gl in gls:
        gl1cParse(gl)
        gl2cParse(gl)
@@ -106,10 +111,5 @@ def gl3cParse(soup_element):
        print("       " + man_tag)
        man_tags.append(man_tag)
    
-    print(len(man_tags))
-        
-
-   
-        
    pass

--- a/GetArc_Hitomi.py
+++ b/GetArc_Hitomi.py
@@ -5,26 +5,15 @@ from selenium.webdriver.support import expected_conditions as EC
 from selenium.common.exceptions import TimeoutException
 from bs4 import BeautifulSoup

-
-class MangaMetaInfo:
-    def __init__(self, title, url, lang, manType, *tags):
-        self.title = title
-        self.url = url
-        self.lang = lang
-        self.manType = manType
-        self.tags = tags
-        pass
-    
-            #series
-            #type
-            #languages
-            #tags
-
+import UtilPack as util

 listResult = []
    
 def GetSearchResult(searchWord):
-    url = getSiteUrl() + searchWord
+    url = getSiteUrl(searchWord)
+    
+    util.DbgOut("Hitomi : " + url)
+    
    driver = webdriver.Chrome()
    driver.get(url)
    
@@ -34,7 +23,7 @@ def GetSearchResult(searchWord):
            EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
        )
    except TimeoutException:
-        print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
+        util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
        driver.quit()
        return
        
@@ -46,8 +35,11 @@ def GetSearchResult(searchWord):
    pass
    
 #
-def getSiteUrl():
-    strRet = "https://hitomi.la/search.html?"
+def getSiteUrl(searchWord):
+    strRet = "https://hitomi.la/"
+    
+    if False == util.IsEmptyStr(searchWord):
+        strRet = strRet + "search.html?" + searchWord

    return strRet
    
--- a/StoreXLS.py
+++ b/StoreXLS.py
@@ -1,30 +1,120 @@
 import os
-import time
-from openpyxl import Workbook
-from openpyxl import load_workbook
-from openpyxl.utils import get_column_letter
+from openpyxl import opyxl
+from openpyxl.utils import xlsUtils
+
+import DataClass as stManga
+import UtilPack as util


 xls_name = "mangaDB.xlsx"
-list_MetaInfo = []
+m_wb = None

+sheetName_Mangainfo = "MangaInfo" 
+sheetName_Artists = "Artists"
+sheetName_Tags = "Tags"
+
+strMngSht = "MngInfo"
+strArtSht = "ArtInfo"
+strTagSht = "TagInfo"

 #
-def GetCurrentTime():
-    # 현재 시간을 구하고 구조체로 변환
-    current_time_struct = time.localtime()
-
-    # 구조체에서 연, 월, 일, 시간, 분, 초를 추출
-    year = current_time_struct.tm_year
-    month = current_time_struct.tm_mon
-    day = current_time_struct.tm_mday
-    hour = current_time_struct.tm_hour
-    minute = current_time_struct.tm_min
-    second = current_time_struct.tm_sec
+def DBXLSOpen(path):
+    xls_path = GetXLSPath(path)
+    util.DbgOut(xls_path)
    
-    strRet = (f"{year}/{month}/{day}_{hour}:{minute}:{second}")
+    try:
+        wb = opyxl(xls_path)
+        util.DbgOut("xls Open Successed")
+    except FileNotFoundError:
+        wb = opyxl()
+        util.DbgOut("xls Created")
+      
+    if wb is None:
+        util.DbgOut("XLS Open Something Wrong...")
+        return
+    
+    m_wb = wb
+    
+    ws = wb.active
+    # time, title, url, tags (comma)
+    ws['A1'] = "Modified Time"
+    ws['B1'] = int(time.time())
+    
+    
+    if 'list' not in wb.sheetnames:
+        ws1 = wb.create_sheet(title='list')
+        print('list sheet created')
+    
+    wb.save(xls_path)
+    
+    ws2 = wb['list']
+    print(str(index) + " searched")
+    
+    
+def DBXLSClose():
+    if m_wb is None:
+        return
+    
+    m_wb.save(xls_path)
+    m_wb.close()
+    
+    m_wb = None
+    
+    
+#
+def WriteMangaInfos(*listInfos):
+    if False == isinstance(listInfos, list):    
+        return
+    
+    ws_mng = getSheet(strMngSht)
+    if None == ws_mng:
+        return
+    
+    #for item in listInfos:
+        # 클래스 타잎을 확인해야 하지만만.. 생략.
+        # title, url, artist, group, series(parady), type, tags, hitomi ID, hitomi file, eh ID, eh tor
+        
+def AddTagInfo(tagInfo):
+    pass

-    return strRet 
+def AddTagInfo(strTag, strUrl):
+    pass
+
+def AddArtistInfo(artistInfo):
+    pass
+
+def AddArAddArtistInfo(strArtist, strUrl):
+    pass
+
+def AddSeriesInfo(SeriesInfo):
+    pass
+
+def AddSeriesInfo(strSerires, strUrl):
+    pass
+
+def AddTypeInfo(typeInfo):
+    pass
+
+def AddTypeInfo(strType, strUrl):
+    pass
+
+        
+def getSheet(sheetName):
+    if None == m_wb:
+        return
+    
+    if sheetName in m_wb.sheetnames:
+        return m_wb[sheetName]
+    
+    return m_wb.create_sheet(title=sheetName)
+
+#
+def GetXLSPath(path):
+    retPath = path
+    if False == os.path.exists(path):
+        retPath = os.path.abspath(__file__)
+    
+    return retPath + xls_name


 # #
--- a/UtilPack.py
+++ b/UtilPack.py
@@ -0,0 +1,39 @@
+import os
+import time
+
+m_dbgLevel = 0
+listDbgStr = []
+
+#
+def IsEmptyStr(string):
+    return 0 == len(string.strip())
+
+#
+def GetCurrentTime():
+    # 현재 시간을 구하고 구조체로 변환
+    current_time_struct = time.localtime()
+
+    # 구조체에서 연, 월, 일, 시간, 분, 초를 추출
+    year = current_time_struct.tm_year
+    month = current_time_struct.tm_mon
+    day = current_time_struct.tm_mday
+    hour = current_time_struct.tm_hour
+    minute = current_time_struct.tm_min
+    second = current_time_struct.tm_sec
+    
+    strRet = (f"{year}/{month}/{day}_{hour}:{minute}:{second}")
+
+    return strRet 
+
+#for debug
+def DbgOut(str):
+    strMsg = GetCurrentTime() +" : " + str
+    
+    listDbgStr.append(strMsg)
+    print(strMsg)
+    
+def printDbgMessages():
+    for line in listDbgStr:
+        print(line)
+
+    
--- a/main.py
+++ b/main.py
@@ -2,7 +2,7 @@ import GetArc_Hitomi as getHitomi
 import GetArc_Ehentai as getEhentai

 def main():
-    #getHitomi.GetSearchResult("trouble sweets")
+    getHitomi.GetSearchResult("trouble sweets")
    getEhentai.GetSearchResult("artist%3A%22kotomi+yo-ji%24%22")
    
    #artist:"kotomi yo-ji$"