utility class added

자료 저장 구조 고민중...
This commit is contained in:
2024-04-08 01:33:15 +09:00
parent 809748a73a
commit cc02151899
6 changed files with 217 additions and 61 deletions

View File

@@ -1,13 +1,48 @@
class MangaMetaInfo:
def __init__(self, title, url, lang, manType, *tags):
self.title = title
self.url = url
self.lang = lang
self.manType = manType
self.tags = tags
pass
title = ""
url = ""
Series = ""
artists = []
Tags = []
fileurl = ""
galID = ""
#series
#type
#languages
#tags
def AddArtist(self, strArtist):
self.artists.append(strArtist)
def AddTag(self, strTag):
self.tags.append(strTag)
class SeriesInfo:
strSeries = ""
strUrl = ""
def __init__(self, series, url):
self.strSeries = series
self.strUrl = url
class TypeInfo:
strTyep = ""
strUrl = ""
def __init__(self, strType, url):
self.strTyep = strType
self.strUrl = url
class ArtistInfo:
strArtist = ""
strUrl = ""
def __init__(self, artist, url):
self.strArtist = artist
self.strUrl = url
class TagIngo:
strTag = ""
strUrl = ""
def __init__(self, tag, url):
self.strTag = tag
self.strUrl = url

View File

@@ -5,13 +5,17 @@ from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
import UtilPack as util
listResult = []
#
def GetSearchResult(searchWord):
print("E-hentai start")
#url = getSiteUrl() + searchWord
url = "https://e-hentai.org/"
url = getSiteUrl(searchWord)
util.DbgOut("EHentai : " + url)
driver = webdriver.Chrome()
driver.get(url)
@@ -21,7 +25,7 @@ def GetSearchResult(searchWord):
EC.presence_of_element_located((By.CLASS_NAME, 'dp'))
)
except TimeoutException:
print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
driver.quit()
return
@@ -33,8 +37,10 @@ def GetSearchResult(searchWord):
pass
#
def getSiteUrl():
strRet = "https://e-hentai.org/?f_search="
def getSiteUrl(searchWord):
strRet = "https://e-hentai.org/"
if False == util.IsEmptyStr(searchWord):
strRet = strRet + "?f_search=" + searchWord
return strRet
@@ -46,7 +52,6 @@ def parseMangaInfos(html_doc):
gallery_table = soup.find('table', class_='itg gltc')
gls = gallery_table.find_all('tr')
idx = 1;
for gl in gls:
gl1cParse(gl)
gl2cParse(gl)
@@ -106,10 +111,5 @@ def gl3cParse(soup_element):
print(" " + man_tag)
man_tags.append(man_tag)
print(len(man_tags))
pass

View File

@@ -5,26 +5,15 @@ from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
class MangaMetaInfo:
def __init__(self, title, url, lang, manType, *tags):
self.title = title
self.url = url
self.lang = lang
self.manType = manType
self.tags = tags
pass
#series
#type
#languages
#tags
import UtilPack as util
listResult = []
def GetSearchResult(searchWord):
url = getSiteUrl() + searchWord
url = getSiteUrl(searchWord)
util.DbgOut("Hitomi : " + url)
driver = webdriver.Chrome()
driver.get(url)
@@ -34,7 +23,7 @@ def GetSearchResult(searchWord):
EC.presence_of_element_located((By.CLASS_NAME, 'lillie'))
)
except TimeoutException:
print("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
util.DbgOut("페이지가 로드되지 않았거나 요소를 찾을 수 없습니다.")
driver.quit()
return
@@ -46,8 +35,11 @@ def GetSearchResult(searchWord):
pass
#
def getSiteUrl():
strRet = "https://hitomi.la/search.html?"
def getSiteUrl(searchWord):
strRet = "https://hitomi.la/"
if False == util.IsEmptyStr(searchWord):
strRet = strRet + "search.html?" + searchWord
return strRet

View File

@@ -1,30 +1,120 @@
import os
import time
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
from openpyxl import opyxl
from openpyxl.utils import xlsUtils
import DataClass as stManga
import UtilPack as util
xls_name = "mangaDB.xlsx"
list_MetaInfo = []
m_wb = None
sheetName_Mangainfo = "MangaInfo"
sheetName_Artists = "Artists"
sheetName_Tags = "Tags"
strMngSht = "MngInfo"
strArtSht = "ArtInfo"
strTagSht = "TagInfo"
#
def GetCurrentTime():
# 현재 시간을 구하고 구조체로 변환
current_time_struct = time.localtime()
# 구조체에서 연, 월, 일, 시간, 분, 초를 추출
year = current_time_struct.tm_year
month = current_time_struct.tm_mon
day = current_time_struct.tm_mday
hour = current_time_struct.tm_hour
minute = current_time_struct.tm_min
second = current_time_struct.tm_sec
def DBXLSOpen(path):
xls_path = GetXLSPath(path)
util.DbgOut(xls_path)
strRet = (f"{year}/{month}/{day}_{hour}:{minute}:{second}")
try:
wb = opyxl(xls_path)
util.DbgOut("xls Open Successed")
except FileNotFoundError:
wb = opyxl()
util.DbgOut("xls Created")
if wb is None:
util.DbgOut("XLS Open Something Wrong...")
return
m_wb = wb
ws = wb.active
# time, title, url, tags (comma)
ws['A1'] = "Modified Time"
ws['B1'] = int(time.time())
if 'list' not in wb.sheetnames:
ws1 = wb.create_sheet(title='list')
print('list sheet created')
wb.save(xls_path)
ws2 = wb['list']
print(str(index) + " searched")
def DBXLSClose():
if m_wb is None:
return
m_wb.save(xls_path)
m_wb.close()
m_wb = None
#
def WriteMangaInfos(*listInfos):
if False == isinstance(listInfos, list):
return
ws_mng = getSheet(strMngSht)
if None == ws_mng:
return
#for item in listInfos:
# 클래스 타잎을 확인해야 하지만만.. 생략.
# title, url, artist, group, series(parady), type, tags, hitomi ID, hitomi file, eh ID, eh tor
def AddTagInfo(tagInfo):
pass
return strRet
def AddTagInfo(strTag, strUrl):
pass
def AddArtistInfo(artistInfo):
pass
def AddArAddArtistInfo(strArtist, strUrl):
pass
def AddSeriesInfo(SeriesInfo):
pass
def AddSeriesInfo(strSerires, strUrl):
pass
def AddTypeInfo(typeInfo):
pass
def AddTypeInfo(strType, strUrl):
pass
def getSheet(sheetName):
if None == m_wb:
return
if sheetName in m_wb.sheetnames:
return m_wb[sheetName]
return m_wb.create_sheet(title=sheetName)
#
def GetXLSPath(path):
retPath = path
if False == os.path.exists(path):
retPath = os.path.abspath(__file__)
return retPath + xls_name
# #

39
UtilPack.py Normal file
View File

@@ -0,0 +1,39 @@
import os
import time
m_dbgLevel = 0
listDbgStr = []
#
def IsEmptyStr(string):
return 0 == len(string.strip())
#
def GetCurrentTime():
# 현재 시간을 구하고 구조체로 변환
current_time_struct = time.localtime()
# 구조체에서 연, 월, 일, 시간, 분, 초를 추출
year = current_time_struct.tm_year
month = current_time_struct.tm_mon
day = current_time_struct.tm_mday
hour = current_time_struct.tm_hour
minute = current_time_struct.tm_min
second = current_time_struct.tm_sec
strRet = (f"{year}/{month}/{day}_{hour}:{minute}:{second}")
return strRet
#for debug
def DbgOut(str):
strMsg = GetCurrentTime() +" : " + str
listDbgStr.append(strMsg)
print(strMsg)
def printDbgMessages():
for line in listDbgStr:
print(line)

View File

@@ -2,7 +2,7 @@ import GetArc_Hitomi as getHitomi
import GetArc_Ehentai as getEhentai
def main():
#getHitomi.GetSearchResult("trouble sweets")
getHitomi.GetSearchResult("trouble sweets")
getEhentai.GetSearchResult("artist%3A%22kotomi+yo-ji%24%22")
#artist:"kotomi yo-ji$"