utility class added

자료 저장 구조 고민중...
2024-04-08 01:33:15 +09:00
parent 809748a73a
commit cc02151899
6 changed files with 217 additions and 61 deletions
--- a/DataClass.py
+++ b/DataClass.py
@@ -1,13 +1,48 @@
 class MangaMetaInfo:
-    def __init__(self, title, url, lang, manType, *tags):
+    title = ""
-        self.title = title
+    url = ""
-        self.url = url
+    Series = ""
-        self.lang = lang
+    artists = []
-        self.manType = manType
+    Tags = []
-        self.tags = tags
+    fileurl = ""
-        pass
+    galID = ""
    def AddArtist(self, strArtist):
        self.artists.append(strArtist)
    def AddTag(self, strTag):
        self.tags.append(strTag)        
 class SeriesInfo:
    strSeries = ""
    strUrl = ""
    def __init__(self, series, url):
        self.strSeries = series
        self.strUrl = url
 class TypeInfo:
    strTyep = ""
    strUrl = ""
    def __init__(self, strType, url):
        self.strTyep = strType
        self.strUrl = url
 class ArtistInfo:
    strArtist = ""
    strUrl = ""
    def __init__(self, artist, url):
        self.strArtist = artist
        self.strUrl = url
 class TagIngo:
    strTag = ""
    strUrl = ""
    def __init__(self, tag, url):
        self.strTag = tag
        self.strUrl = url
            #series
            #type
            #languages
            #tags
--- a/GetArc_Ehentai.py
+++ b/GetArc_Ehentai.py
@@ -5,13 +5,17 @@ from selenium.webdriver.support import expected_conditions as EC
 from selenium.common.exceptions import TimeoutException
 from bs4 import BeautifulSoup
 import UtilPack as util
 listResult = []
 #
 def GetSearchResult(searchWord):
-    print("E-hentai start")
+    url = getSiteUrl(searchWord)
-    #url = getSiteUrl() + searchWord
+    
-    url = "https://e-hentai.org/"
+    util.DbgOut("EHentai : " + url)
    driver = webdriver.Chrome()
    driver.get(url)
@@ -21,7 +25,7 @@ def GetSearchResult(searchWord):
            EC.presence_of_element_located((By.CLASS_NAME, 'dp'))
        )
    except TimeoutException:
-        print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
+        util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
        driver.quit()
        return
@@ -33,8 +37,10 @@ def GetSearchResult(searchWord):
    pass
 #
-def getSiteUrl():
+def getSiteUrl(searchWord):
-    strRet = "https://e-hentai.org/?f_search="
+    strRet = "https://e-hentai.org/"
    if False == util.IsEmptyStr(searchWord):
        strRet = strRet + "?f_search=" + searchWord
    return strRet
@@ -46,7 +52,6 @@ def parseMangaInfos(html_doc):
    gallery_table = soup.find('table', class_='itg gltc')
    gls = gallery_table.find_all('tr')
    idx = 1;
    for gl in gls:
        gl1cParse(gl)
        gl2cParse(gl)
@@ -106,10 +111,5 @@ def gl3cParse(soup_element):
        print("       " + man_tag)
        man_tags.append(man_tag)
    print(len(man_tags))
    pass
--- a/GetArc_Hitomi.py
+++ b/GetArc_Hitomi.py
@@ -5,26 +5,15 @@ from selenium.webdriver.support import expected_conditions as EC
 from selenium.common.exceptions import TimeoutException
 from bs4 import BeautifulSoup
-
+import UtilPack as util
 class MangaMetaInfo:
    def __init__(self, title, url, lang, manType, *tags):
        self.title = title
        self.url = url
        self.lang = lang
        self.manType = manType
        self.tags = tags
        pass
            #series
            #type
            #languages
            #tags
 listResult = []
 def GetSearchResult(searchWord):
-    url = getSiteUrl() + searchWord
+    url = getSiteUrl(searchWord)
    util.DbgOut("Hitomi : " + url)
    driver = webdriver.Chrome()
    driver.get(url)
@@ -34,7 +23,7 @@ def GetSearchResult(searchWord):
            EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
        )
    except TimeoutException:
-        print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
+        util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
        driver.quit()
        return
@@ -46,8 +35,11 @@ def GetSearchResult(searchWord):
    pass
 #
-def getSiteUrl():
+def getSiteUrl(searchWord):
-    strRet = "https://hitomi.la/search.html?"
+    strRet = "https://hitomi.la/"
    if False == util.IsEmptyStr(searchWord):
        strRet = strRet + "search.html?" + searchWord
    return strRet
--- a/StoreXLS.py
+++ b/StoreXLS.py
@@ -1,30 +1,120 @@
 import os
-import time
+from openpyxl import opyxl
-from openpyxl import Workbook
+from openpyxl.utils import xlsUtils
-from openpyxl import load_workbook
+
-from openpyxl.utils import get_column_letter
+import DataClass as stManga
 import UtilPack as util
 xls_name = "mangaDB.xlsx"
-list_MetaInfo = []
+m_wb = None
 sheetName_Mangainfo = "MangaInfo" 
 sheetName_Artists = "Artists"
 sheetName_Tags = "Tags"
 strMngSht = "MngInfo"
 strArtSht = "ArtInfo"
 strTagSht = "TagInfo"
 #
 def DBXLSOpen(path):
    xls_path = GetXLSPath(path)
    util.DbgOut(xls_path)
    try:
        wb = opyxl(xls_path)
        util.DbgOut("xls Open Successed")
    except FileNotFoundError:
        wb = opyxl()
        util.DbgOut("xls Created")
    if wb is None:
        util.DbgOut("XLS Open Something Wrong...")
        return
    m_wb = wb
    ws = wb.active
    # time, title, url, tags (comma)
    ws['A1'] = "Modified Time"
    ws['B1'] = int(time.time())
    if 'list' not in wb.sheetnames:
        ws1 = wb.create_sheet(title='list')
        print('list sheet created')
    wb.save(xls_path)
    ws2 = wb['list']
    print(str(index) + " searched")
 def DBXLSClose():
    if m_wb is None:
        return
    m_wb.save(xls_path)
    m_wb.close()
    m_wb = None
 #
-def GetCurrentTime():
+def WriteMangaInfos(*listInfos):
-    # 현재 시간을 구하고 구조체로 변환
+    if False == isinstance(listInfos, list):    
-    current_time_struct = time.localtime()
+        return
-    # 구조체에서 연, 월, 일, 시간, 분, 초를 추출
+    ws_mng = getSheet(strMngSht)
-    year = current_time_struct.tm_year
+    if None == ws_mng:
-    month = current_time_struct.tm_mon
+        return
    day = current_time_struct.tm_mday
    hour = current_time_struct.tm_hour
    minute = current_time_struct.tm_min
    second = current_time_struct.tm_sec
-    strRet = (f"{year}/{month}/{day}_{hour}:{minute}:{second}")
+    #for item in listInfos:
        # 클래스 타잎을 확인해야 하지만만.. 생략.
        # title, url, artist, group, series(parady), type, tags, hitomi ID, hitomi file, eh ID, eh tor
-    return strRet 
+def AddTagInfo(tagInfo):
    pass
 def AddTagInfo(strTag, strUrl):
    pass
 def AddArtistInfo(artistInfo):
    pass
 def AddArAddArtistInfo(strArtist, strUrl):
    pass
 def AddSeriesInfo(SeriesInfo):
    pass
 def AddSeriesInfo(strSerires, strUrl):
    pass
 def AddTypeInfo(typeInfo):
    pass
 def AddTypeInfo(strType, strUrl):
    pass
 def getSheet(sheetName):
    if None == m_wb:
        return
    if sheetName in m_wb.sheetnames:
        return m_wb[sheetName]
    return m_wb.create_sheet(title=sheetName)
 #
 def GetXLSPath(path):
    retPath = path
    if False == os.path.exists(path):
        retPath = os.path.abspath(__file__)
    return retPath + xls_name
 # #
--- a/UtilPack.py
+++ b/UtilPack.py
@@ -0,0 +1,39 @@
 import os
 import time
 m_dbgLevel = 0
 listDbgStr = []
 #
 def IsEmptyStr(string):
    return 0 == len(string.strip())
 #
 def GetCurrentTime():
    # 현재 시간을 구하고 구조체로 변환
    current_time_struct = time.localtime()
    # 구조체에서 연, 월, 일, 시간, 분, 초를 추출
    year = current_time_struct.tm_year
    month = current_time_struct.tm_mon
    day = current_time_struct.tm_mday
    hour = current_time_struct.tm_hour
    minute = current_time_struct.tm_min
    second = current_time_struct.tm_sec
    strRet = (f"{year}/{month}/{day}_{hour}:{minute}:{second}")
    return strRet 
 #for debug
 def DbgOut(str):
    strMsg = GetCurrentTime() +" : " + str
    listDbgStr.append(strMsg)
    print(strMsg)
 def printDbgMessages():
    for line in listDbgStr:
        print(line)
--- a/main.py
+++ b/main.py
@@ -2,7 +2,7 @@ import GetArc_Hitomi as getHitomi
 import GetArc_Ehentai as getEhentai
 def main():
-    #getHitomi.GetSearchResult("trouble sweets")
+    getHitomi.GetSearchResult("trouble sweets")
    getEhentai.GetSearchResult("artist%3A%22kotomi+yo-ji%24%22")
    #artist:"kotomi yo-ji$"