From cc02151899ff4bc631c6d5325b2357eb94de6e73 Mon Sep 17 00:00:00 2001 From: Lee Young Hoon Date: Mon, 8 Apr 2024 01:33:15 +0900 Subject: [PATCH] =?UTF-8?q?utility=20class=20added=20=EC=9E=90=EB=A3=8C=20?= =?UTF-8?q?=EC=A0=80=EC=9E=A5=20=EA=B5=AC=EC=A1=B0=20=EA=B3=A0=EB=AF=BC?= =?UTF-8?q?=EC=A4=91...?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DataClass.py | 57 +++++++++++++++++---- GetArc_Ehentai.py | 24 ++++----- GetArc_Hitomi.py | 30 ++++------- StoreXLS.py | 126 +++++++++++++++++++++++++++++++++++++++------- UtilPack.py | 39 ++++++++++++++ main.py | 2 +- 6 files changed, 217 insertions(+), 61 deletions(-) create mode 100644 UtilPack.py diff --git a/DataClass.py b/DataClass.py index 8d589ca..7ef4a59 100644 --- a/DataClass.py +++ b/DataClass.py @@ -1,13 +1,48 @@ class MangaMetaInfo: - def __init__(self, title, url, lang, manType, *tags): - self.title = title - self.url = url - self.lang = lang - self.manType = manType - self.tags = tags - pass + title = "" + url = "" + Series = "" + artists = [] + Tags = [] + fileurl = "" + galID = "" - #series - #type - #languages - #tags \ No newline at end of file + def AddArtist(self, strArtist): + self.artists.append(strArtist) + + def AddTag(self, strTag): + self.tags.append(strTag) + +class SeriesInfo: + strSeries = "" + strUrl = "" + + def __init__(self, series, url): + self.strSeries = series + self.strUrl = url + + +class TypeInfo: + strTyep = "" + strUrl = "" + + def __init__(self, strType, url): + self.strTyep = strType + self.strUrl = url + +class ArtistInfo: + strArtist = "" + strUrl = "" + + def __init__(self, artist, url): + self.strArtist = artist + self.strUrl = url + +class TagIngo: + strTag = "" + strUrl = "" + + def __init__(self, tag, url): + self.strTag = tag + self.strUrl = url + \ No newline at end of file diff --git a/GetArc_Ehentai.py b/GetArc_Ehentai.py index 61e8337..d5de960 100644 --- a/GetArc_Ehentai.py +++ b/GetArc_Ehentai.py @@ -5,13 +5,17 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException from bs4 import BeautifulSoup +import UtilPack as util + listResult = [] +# def GetSearchResult(searchWord): - print("E-hentai start") - #url = getSiteUrl() + searchWord - url = "https://e-hentai.org/" + url = getSiteUrl(searchWord) + + util.DbgOut("EHentai : " + url) + driver = webdriver.Chrome() driver.get(url) @@ -21,7 +25,7 @@ def GetSearchResult(searchWord): EC.presence_of_element_located((By.CLASS_NAME, 'dp')) ) except TimeoutException: - print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.") + util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.") driver.quit() return @@ -33,8 +37,10 @@ def GetSearchResult(searchWord): pass # -def getSiteUrl(): - strRet = "https://e-hentai.org/?f_search=" +def getSiteUrl(searchWord): + strRet = "https://e-hentai.org/" + if False == util.IsEmptyStr(searchWord): + strRet = strRet + "?f_search=" + searchWord return strRet @@ -46,7 +52,6 @@ def parseMangaInfos(html_doc): gallery_table = soup.find('table', class_='itg gltc') gls = gallery_table.find_all('tr') - idx = 1; for gl in gls: gl1cParse(gl) gl2cParse(gl) @@ -106,10 +111,5 @@ def gl3cParse(soup_element): print(" " + man_tag) man_tags.append(man_tag) - print(len(man_tags)) - - - - pass diff --git a/GetArc_Hitomi.py b/GetArc_Hitomi.py index 4fa7387..099af7b 100644 --- a/GetArc_Hitomi.py +++ b/GetArc_Hitomi.py @@ -5,26 +5,15 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException from bs4 import BeautifulSoup - -class MangaMetaInfo: - def __init__(self, title, url, lang, manType, *tags): - self.title = title - self.url = url - self.lang = lang - self.manType = manType - self.tags = tags - pass - - #series - #type - #languages - #tags - +import UtilPack as util listResult = [] def GetSearchResult(searchWord): - url = getSiteUrl() + searchWord + url = getSiteUrl(searchWord) + + util.DbgOut("Hitomi : " + url) + driver = webdriver.Chrome() driver.get(url) @@ -34,7 +23,7 @@ def GetSearchResult(searchWord): EC.presence_of_element_located((By.CLASS_NAME, 'lillie')) ) except TimeoutException: - print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.") + util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.") driver.quit() return @@ -46,8 +35,11 @@ def GetSearchResult(searchWord): pass # -def getSiteUrl(): - strRet = "https://hitomi.la/search.html?" +def getSiteUrl(searchWord): + strRet = "https://hitomi.la/" + + if False == util.IsEmptyStr(searchWord): + strRet = strRet + "search.html?" + searchWord return strRet diff --git a/StoreXLS.py b/StoreXLS.py index d92d132..c5156d5 100644 --- a/StoreXLS.py +++ b/StoreXLS.py @@ -1,30 +1,120 @@ import os -import time -from openpyxl import Workbook -from openpyxl import load_workbook -from openpyxl.utils import get_column_letter +from openpyxl import opyxl +from openpyxl.utils import xlsUtils + +import DataClass as stManga +import UtilPack as util xls_name = "mangaDB.xlsx" -list_MetaInfo = [] +m_wb = None +sheetName_Mangainfo = "MangaInfo" +sheetName_Artists = "Artists" +sheetName_Tags = "Tags" + +strMngSht = "MngInfo" +strArtSht = "ArtInfo" +strTagSht = "TagInfo" # -def GetCurrentTime(): - # 현재 시간을 구하고 구조체로 변환 - current_time_struct = time.localtime() - - # 구조체에서 연, 월, 일, 시간, 분, 초를 추출 - year = current_time_struct.tm_year - month = current_time_struct.tm_mon - day = current_time_struct.tm_mday - hour = current_time_struct.tm_hour - minute = current_time_struct.tm_min - second = current_time_struct.tm_sec +def DBXLSOpen(path): + xls_path = GetXLSPath(path) + util.DbgOut(xls_path) - strRet = (f"{year}/{month}/{day}_{hour}:{minute}:{second}") + try: + wb = opyxl(xls_path) + util.DbgOut("xls Open Successed") + except FileNotFoundError: + wb = opyxl() + util.DbgOut("xls Created") + + if wb is None: + util.DbgOut("XLS Open Something Wrong...") + return + + m_wb = wb + + ws = wb.active + # time, title, url, tags (comma) + ws['A1'] = "Modified Time" + ws['B1'] = int(time.time()) + + + if 'list' not in wb.sheetnames: + ws1 = wb.create_sheet(title='list') + print('list sheet created') + + wb.save(xls_path) + + ws2 = wb['list'] + print(str(index) + " searched") + + +def DBXLSClose(): + if m_wb is None: + return + + m_wb.save(xls_path) + m_wb.close() + + m_wb = None + + +# +def WriteMangaInfos(*listInfos): + if False == isinstance(listInfos, list): + return + + ws_mng = getSheet(strMngSht) + if None == ws_mng: + return + + #for item in listInfos: + # 클래스 타잎을 확인해야 하지만만.. 생략. + # title, url, artist, group, series(parady), type, tags, hitomi ID, hitomi file, eh ID, eh tor + +def AddTagInfo(tagInfo): + pass - return strRet +def AddTagInfo(strTag, strUrl): + pass + +def AddArtistInfo(artistInfo): + pass + +def AddArAddArtistInfo(strArtist, strUrl): + pass + +def AddSeriesInfo(SeriesInfo): + pass + +def AddSeriesInfo(strSerires, strUrl): + pass + +def AddTypeInfo(typeInfo): + pass + +def AddTypeInfo(strType, strUrl): + pass + + +def getSheet(sheetName): + if None == m_wb: + return + + if sheetName in m_wb.sheetnames: + return m_wb[sheetName] + + return m_wb.create_sheet(title=sheetName) + +# +def GetXLSPath(path): + retPath = path + if False == os.path.exists(path): + retPath = os.path.abspath(__file__) + + return retPath + xls_name # # diff --git a/UtilPack.py b/UtilPack.py new file mode 100644 index 0000000..cc98d4e --- /dev/null +++ b/UtilPack.py @@ -0,0 +1,39 @@ +import os +import time + +m_dbgLevel = 0 +listDbgStr = [] + +# +def IsEmptyStr(string): + return 0 == len(string.strip()) + +# +def GetCurrentTime(): + # 현재 시간을 구하고 구조체로 변환 + current_time_struct = time.localtime() + + # 구조체에서 연, 월, 일, 시간, 분, 초를 추출 + year = current_time_struct.tm_year + month = current_time_struct.tm_mon + day = current_time_struct.tm_mday + hour = current_time_struct.tm_hour + minute = current_time_struct.tm_min + second = current_time_struct.tm_sec + + strRet = (f"{year}/{month}/{day}_{hour}:{minute}:{second}") + + return strRet + +#for debug +def DbgOut(str): + strMsg = GetCurrentTime() +" : " + str + + listDbgStr.append(strMsg) + print(strMsg) + +def printDbgMessages(): + for line in listDbgStr: + print(line) + + \ No newline at end of file diff --git a/main.py b/main.py index af11b9e..e55c808 100644 --- a/main.py +++ b/main.py @@ -2,7 +2,7 @@ import GetArc_Hitomi as getHitomi import GetArc_Ehentai as getEhentai def main(): - #getHitomi.GetSearchResult("trouble sweets") + getHitomi.GetSearchResult("trouble sweets") getEhentai.GetSearchResult("artist%3A%22kotomi+yo-ji%24%22") #artist:"kotomi yo-ji$"