From e9f8a7323c599dd53b1e923c590c37e7422645bf Mon Sep 17 00:00:00 2001 From: Lee Young Hoon Date: Mon, 13 May 2024 21:48:40 +0900 Subject: [PATCH] Update DataClass.py, GetArc_Ehentai.py, and 7 more files... MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 데이터 클래스 정의, 퍼필 다운로더 json 파서... --- DataClass.py | 90 +++++++++--------- GetArc_Ehentai.py | 12 ++- GetArc_Hitomi.py | 85 ++++++++++++----- MgrCalibreLibs.py | 98 +++++++++++++++++++ StoreXLS.py | 233 ++++++++++++++++++---------------------------- UtilPack.py | 177 ++++++++++++++++++++++++++++++++++- main.py | 11 ++- pupildata.py | 93 ++++++++++++++++++ rarzipcbz.py | 181 +++++++++++++++++++++++++++++++++++ 9 files changed, 762 insertions(+), 218 deletions(-) create mode 100644 MgrCalibreLibs.py create mode 100644 pupildata.py create mode 100644 rarzipcbz.py diff --git a/DataClass.py b/DataClass.py index 7ef4a59..3344f7c 100644 --- a/DataClass.py +++ b/DataClass.py @@ -1,48 +1,50 @@ -class MangaMetaInfo: - title = "" - url = "" - Series = "" - artists = [] - Tags = [] - fileurl = "" - galID = "" - - def AddArtist(self, strArtist): - self.artists.append(strArtist) - - def AddTag(self, strTag): - self.tags.append(strTag) - -class SeriesInfo: - strSeries = "" - strUrl = "" - - def __init__(self, series, url): - self.strSeries = series - self.strUrl = url - - -class TypeInfo: - strTyep = "" - strUrl = "" - - def __init__(self, strType, url): - self.strTyep = strType - self.strUrl = url +class CBZInfo: + def __init__(self, title, url): + self.title = title + self.url = url + self.serires = "" + self.type = "" + self.filename = "" + self.torrent = "" + self.language = "" + self.gallery_id = 0 + # 중복을 허용하지 않는 집합으로 초기화 + self.related_galID = set() + self.artists = set() + self.tags = set() + + def __str__(self): + return f"{self.title} by {self.author} ({self.publication_year})" + + def AddTag(self, name): + self.tags.add(name) + + def RmvTag(self, name): + self.tags.discard(name) + + def AddArtist(self, name): + self.artists.add(name) + + def RmvArtist(self, name): + self.artists.discard(name) + -class ArtistInfo: - strArtist = "" - strUrl = "" - def __init__(self, artist, url): - self.strArtist = artist - self.strUrl = url +class TagInfo: + def __init__(self, name, url): + self.name = name + self.url = url -class TagIngo: - strTag = "" - strUrl = "" + def __str__(self): + return f"{self.name} : {self.url}" - def __init__(self, tag, url): - self.strTag = tag - self.strUrl = url - \ No newline at end of file + +class ImageFileInfo: + def __init__(self, name, height, width, hashValue, bWebp): + self.name = name + self.height = height + self.width = width + self.hashValue = hashValue + self.bWebp = bWebp + + \ No newline at end of file diff --git a/GetArc_Ehentai.py b/GetArc_Ehentai.py index d5de960..2cfacce 100644 --- a/GetArc_Ehentai.py +++ b/GetArc_Ehentai.py @@ -6,10 +6,9 @@ from selenium.common.exceptions import TimeoutException from bs4 import BeautifulSoup import UtilPack as util +import DataClass as info -listResult = [] - # def GetSearchResult(searchWord): url = getSiteUrl(searchWord) @@ -35,6 +34,14 @@ def GetSearchResult(searchWord): parseMangaInfos(strContent) pass + +def GetGalInfoFromID(gal_id): + + #익헨은 https://koromo.xyz/api/search/ehash?id=번호 로 검색해서 토큰값 얻은 다음에 + #https://exhentai.org/g/번호/토큰 + #https://e-hentai.org/api/search/ehash?id=2890782 + + pass # def getSiteUrl(searchWord): @@ -56,7 +63,6 @@ def parseMangaInfos(html_doc): gl1cParse(gl) gl2cParse(gl) gl3cParse(gl) - print("\r\n") # type diff --git a/GetArc_Hitomi.py b/GetArc_Hitomi.py index 099af7b..e342048 100644 --- a/GetArc_Hitomi.py +++ b/GetArc_Hitomi.py @@ -6,9 +6,10 @@ from selenium.common.exceptions import TimeoutException from bs4 import BeautifulSoup import UtilPack as util +import DataClass as info -listResult = [] - + +# def GetSearchResult(searchWord): url = getSiteUrl(searchWord) @@ -39,70 +40,104 @@ def getSiteUrl(searchWord): strRet = "https://hitomi.la/" if False == util.IsEmptyStr(searchWord): - strRet = strRet + "search.html?" + searchWord + if False == searchWord.isdigit(): + strRet = strRet + "search.html?" + searchWord + else: + strRet = strRet + "galleries/" + searchWord + ".html" return strRet - # def parseMangaInfos(html_doc): # BeautifulSoup 객체 생성 soup = BeautifulSoup(html_doc, 'html.parser') gallery_elements = soup.find_all(class_='gallery-content') + listDJs = [] for element in gallery_elements: - djParse(element) + listDJ = djParse(element) + listDJs.extend(listDJ) + print(len(listDJs)) + def djParse(soup_element): childs = soup_element.find_all(class_='dj') + listInfos = [] for child in childs: - djtitleParse(child) - artistlistParse(child) - djDescParse(child) + info = djTitleParse(child) - print("\r\n") + listTag1 = djArtistParse(child, info) + listTag2 = djDescParse(child, info) + listInfos.append(info) -def djtitleParse(soup_element): - element = soup_element.find('h1', class_='lillie') + return listInfos + + +def djTitleParse(input_element): + element = input_element.find('h1', class_='lillie') title = element.text a_tag = element.find('a') - a_url = a_tag.get('href') - - print("title : " + title) - print("URl : " + a_url) + url = a_tag.get('href') -def artistlistParse(soup_element): - element = soup_element.find('div', class_='artist-list') + util.DbgOut("title : " + title) + util.DbgOut("URl : " + url) - print("artists") + return info.CBZInfo(title, url) + + +def djArtistParse(input_element, retPtr): + element = input_element.find('div', class_='artist-list') a_tags = element.find_all('a') + listArtists = [] for tag in a_tags: artist = tag.text a_url = tag.get('href') - print(" " + artist + " " + a_url) + retPtr.AddArtist(artist) + listArtists.append( info.TagInfo(artist, a_url) ) + return listArtists -def djDescParse(soup_element): - element = soup_element.find('table', class_='dj-desc') + +def djDescParse(input_element, retPtr): + element = input_element.find('table', class_='dj-desc') tb_rows = element.find_all('tr') + listTags = [] for row in tb_rows: tds = row.find_all('td') if 2 != len(tds): - print("td get failed") + util.DbgOut("Warning : td get failed") continue - print(tds[0].text + " : ") + + outMsg = f"{tds[0].text} : \r\n" a_tags = tds[1].find_all('a') for tag in a_tags: tag_name = tag.text tag_url = tag.get('href') - print(" " + tag_name + " " + tag_url) + + retPtr.AddTag(tag_name) + + listTags.append(info.TagInfo(tag_name, tag_url)) + + outMsg += f" {tag_name} {tag_url}\r\n" - pass + util.DbgOut(outMsg) + + # + if "Series" == tds[0]: + retPtr.serires = listTags[-1].name + elif "Type" == tds[0]: + retPtr.type = listTags[-1].name + elif "Language" == tds[0]: + retPtr.language = listTags[-1].name + else: + pass + + return listTags diff --git a/MgrCalibreLibs.py b/MgrCalibreLibs.py new file mode 100644 index 0000000..0ffebfd --- /dev/null +++ b/MgrCalibreLibs.py @@ -0,0 +1,98 @@ +import os +import rarfile +import zipfile +import shutil +import difflib +import UtilPack as util +import subprocess as ps + +from PIL import Image + +m_ImgExts = [".jpg",".png",".jpeg",".webp"] +m_CalLibPath = "/Volumes/NewDataStor/calibre_lib/" + +def Start(): + pathTrg = os.path.abspath(m_CalLibPath) + if False == os.path.exists(pathTrg): + util.DbgOut("Error : Invalid Path") + + listPaths = util.ListSubDirectories(pathTrg) + + print(f"Input : {len(listPaths)} \r\n") + + nIdx = 0; + listWorked = [] + listSkiped = [] + strCoverName = "cover.jpg" + for path in listPaths: + nIdx += 1 + # 조건은 커버파일이 있고 크기가 0 일 경우가 문제가 되는데... + pathCover = os.path.join(path, strCoverName) + if True == os.path.exists(pathCover) and 0 < os.path.getsize(pathCover): + continue + + # 조건에 맞는 폴더에서 cbz 파일을 찾아낸다. + nameCBZFile = "" + contents = os.listdir(path) + for item in contents: + extTmp = util.GetExtStr(item, False) + if extTmp.lower() == "cbz": + nameCBZFile = item + break + + # cbz 없으면 일단 넘어간다. + if nameCBZFile == "": + continue + + # # 압축을 풀고 + pathCBZFull = os.path.join(path, nameCBZFile) + util.ExtractZIP(pathCBZFull, path) + + # 하위 폴더를 훑어서 이미지 파일을 가져온다. 일단 webp 만... + listTrgFiles = util.ListFileExtRcr(path, "webp") + + if 0 >= len(listTrgFiles): + continue + + # CBZ 파일 삭제 + os.remove(pathCBZFull) + + # 다시 압축을 한다 + if False == util.CreateZIPShell(pathCBZFull, listTrgFiles): + print(f"error : {pathCBZFull}") + break + + # 크기가 0 인 커버 지운다. + if True == os.path.exists(pathCover): + os.remove(pathCover) + + # 원본 WebP 이미지 파일 경로 + webp_file = listTrgFiles[0] + # JPEG로 변환할 이미지를 엽니다 + try: + with Image.open(webp_file) as img: + # 새로운 파일명과 확장자를 지정하여 저장합니다 + img.save(pathCover, "JPEG") + except Exception as e: + listSkiped.append(pathCBZFull) + print(f"커버 변환중 에러 : {e}") + continue + + # webp 파일들 삭제 + for fileTrg in listTrgFiles: + if True == os.path.isfile(fileTrg): + os.remove(fileTrg) + + # 성공 로그! + print(f"Success : {nIdx} : {pathCBZFull}\r\n") + listWorked.append(pathCBZFull) + + print(f"complete : {len(listWorked)}") + print(f"Something Wrong : {listSkiped}") + + + + + + + \ No newline at end of file diff --git a/StoreXLS.py b/StoreXLS.py index c5156d5..05e8f89 100644 --- a/StoreXLS.py +++ b/StoreXLS.py @@ -5,159 +5,112 @@ from openpyxl.utils import xlsUtils import DataClass as stManga import UtilPack as util - -xls_name = "mangaDB.xlsx" -m_wb = None - -sheetName_Mangainfo = "MangaInfo" -sheetName_Artists = "Artists" -sheetName_Tags = "Tags" - -strMngSht = "MngInfo" -strArtSht = "ArtInfo" -strTagSht = "TagInfo" - -# -def DBXLSOpen(path): - xls_path = GetXLSPath(path) - util.DbgOut(xls_path) - - try: - wb = opyxl(xls_path) - util.DbgOut("xls Open Successed") - except FileNotFoundError: - wb = opyxl() - util.DbgOut("xls Created") - - if wb is None: - util.DbgOut("XLS Open Something Wrong...") - return - - m_wb = wb - - ws = wb.active - # time, title, url, tags (comma) - ws['A1'] = "Modified Time" - ws['B1'] = int(time.time()) - - - if 'list' not in wb.sheetnames: - ws1 = wb.create_sheet(title='list') - print('list sheet created') - - wb.save(xls_path) - - ws2 = wb['list'] - print(str(index) + " searched") - - -def DBXLSClose(): - if m_wb is None: - return - - m_wb.save(xls_path) - m_wb.close() - +class DBXLStorage: + xls_name = "mangaDB.xlsx" + xls_path = "" m_wb = None + sheetName_Mangainfo = "MangaInfo" + sheetName_Artists = "Artists" + sheetName_Tags = "Tags" -# -def WriteMangaInfos(*listInfos): - if False == isinstance(listInfos, list): - return + strMngSht = "MngInfo" + strArtSht = "ArtInfo" + strTagSht = "TagInfo" - ws_mng = getSheet(strMngSht) - if None == ws_mng: - return + m_openedXLS = "" - #for item in listInfos: - # 클래스 타잎을 확인해야 하지만만.. 생략. - # title, url, artist, group, series(parady), type, tags, hitomi ID, hitomi file, eh ID, eh tor + def __init__(self, path): + self.path = path -def AddTagInfo(tagInfo): - pass - -def AddTagInfo(strTag, strUrl): - pass - -def AddArtistInfo(artistInfo): - pass - -def AddArAddArtistInfo(strArtist, strUrl): - pass - -def AddSeriesInfo(SeriesInfo): - pass - -def AddSeriesInfo(strSerires, strUrl): - pass - -def AddTypeInfo(typeInfo): - pass - -def AddTypeInfo(strType, strUrl): - pass - + def __enter__(self): + self.DBXLSOpen(self.path) -def getSheet(sheetName): - if None == m_wb: - return + def __exit__(self, ex_type, ex_value, traceback): + self.DBXLSClose() + + def DBXLSOpen(self, path): + xls_path = self.GetXLSPath(path) + util.DbgOut(xls_path) + + try: + m_wb = opyxl(xls_path) + util.DbgOut("xls Open Successed") + except FileNotFoundError: + m_wb = opyxl() + util.DbgOut("xls Created") + + if m_wb is None: + util.DbgOut("XLS Open Something Wrong...") + m_openedXLS = "" + m_wb = None + return + + def DBXLSClose(self): + if self.m_wb is None or self.m_openedXLS is None: + util.DbgOut("XLS Close something wrong...") + return + + self.m_wb.save(self.m_openedXLS) + self.m_wb.close() + + self.m_wb = None + + # + def WriteMangaInfos(self, *listInfos): + if False == isinstance(listInfos, list): + return + + ws_mng = self.getSheet(self.strMngSht) + if None == ws_mng: + return + + #for item in listInfos: + # 클래스 타잎을 확인해야 하지만만.. 생략. + # ttist, group, series(parady), type, tags, hitomi ID, hitomi file, eh ID, eh tor + + def AddTagInfo(self, tagInfo): + pass - if sheetName in m_wb.sheetnames: - return m_wb[sheetName] + def AddTagInfo(self, strTag, strUrl): + pass - return m_wb.create_sheet(title=sheetName) - -# -def GetXLSPath(path): - retPath = path - if False == os.path.exists(path): - retPath = os.path.abspath(__file__) + def AddArtistInfo(self, artistInfo): + pass - return retPath + xls_name - - -# # -# def XLSWriteMangainfo(title, url, *tags): -# # -# try: -# wb = load_workbook(xls_path) -# print("Open Successed") -# except FileNotFoundError: -# wb = Workbook() -# print("xls Created") - -# ws = wb.active -# # time, title, url, tags (comma) -# ws['A1'] = "Modified Time" -# ws['B1'] = int(time.time()) + def AddArAddArtistInfo(self, strArtist, strUrl): + pass + def AddSeriesInfo(self, SeriesInfo): + pass -# if 'list' not in wb.sheetnames: -# ws1 = wb.create_sheet(title='list') -# print('list sheet created') + def AddSeriesInfo(self, strSerires, strUrl): + pass -# wb.save(xls_path) + def AddTypeInfo(self, typeInfo): + pass -# ws2 = wb['list'] - -# # 폴더 경로 -# folder_path = '/media/gerd/test/hiyobi_temp/' - -# # 폴더 내의 파일 및 폴더 목록 가져오기 -# items = os.listdir(folder_path) - -# index = 2 -# # 파일 및 폴더 목록 출력 -# for item in items: -# pos = 'A' + str(index) -# ws2[pos] = item -# index += 1 -# #print(item) - -# print(str(index) + " searched") -# wb.save(xls_path) - -# wb.close() + def AddTypeInfo(self, strType, strUrl): + pass + + # 시트를 가져온다. 엑셀 파일이 안 열려 있으면 None, 있으면 반환하고, 없으면 만들어서. + def getSheet(self, sheetName): + retSheet = None + + if self.m_wb: + if sheetName in self.m_wb.sheetnames: + retSheet = self.m_wb[sheetName] + else: + retSheet = self.m_wb.create_sheet(title=sheetName) + + return retSheet + + # 데이터베이스용 엑셀 파일의 전체 경로를 얻어온다. + def GetXLSPath(self, path): + retPath = path + if False == os.path.exists(path): + retPath = os.path.abspath(__file__) + + return os.path.join(retPath, self.xls_name) diff --git a/UtilPack.py b/UtilPack.py index cc98d4e..1d22def 100644 --- a/UtilPack.py +++ b/UtilPack.py @@ -1,5 +1,11 @@ import os import time +import rarfile +import zipfile +import shutil +import difflib +import subprocess + m_dbgLevel = 0 listDbgStr = [] @@ -26,14 +32,177 @@ def GetCurrentTime(): return strRet #for debug -def DbgOut(str): - strMsg = GetCurrentTime() +" : " + str - +def DbgOut(strInput, bPrint = False): + strMsg = (f"{GetCurrentTime()} : {strInput}") listDbgStr.append(strMsg) - print(strMsg) + + if True == bPrint: + print(strMsg) def printDbgMessages(): for line in listDbgStr: print(line) + + +# 입력된 경로의 자식 폴더를 찾아 반환한다. +# 반환하는 리스트는 리커시브 - 손자, 증손자 폴더까지 전부 포함한다 +def ListSubDirectories(root_dir): + subdirectories = [] + # root_dir에서 하위 디렉토리 및 파일 목록을 얻음 + for dirpath, dirnames, filenames in os.walk(root_dir): + # 하위 디렉토리 목록을 반복하며 하위 디렉토리만 추출 + for dirname in dirnames: + path = os.path.join(dirpath, dirname) + + if True == IsFinalFolder(path): + subdirectories.append(path) + + return subdirectories + + +def ListFileExtRcr(pathTrg, strExt): + listRet= [] + + # pathTrg의 하위 디렉토리 및 파일 목록을 얻음 + for dirpath, dirnames, filenames in os.walk(pathTrg): + for file in filenames: + extTmp = GetExtStr(file, False) + if extTmp.lower() == strExt and file.startswith('.'): + listRet.append(os.path.join(dirpath, file)) + + return listRet + +# 입력된 경로가 자식 폴더를 가지고 있는지 판단한다.- 최종 폴더인지 여부 +# 자식이 없으면 True, 자식이 있으면 False +def IsFinalFolder(path): + bRet = True + + contents = os.listdir(path) + for item in contents: + if True == os.path.isdir(item): + bRt = False + break + + return bRet; + +# 어떤 경로 안에서 특정 확장자의 파일을 뽑아내어 그 리스트를 반환한다. +def FindFileFromExt(path, ext): + bDot = False + if 0 <= ext.find('.'): + bDot = True + + listRet = [] + if False == os.path.exists(path): + return listRet + + contents = os.listdir(path) + for item in contents: + if True == os.path.isdir(item): + continue + + extItem = GetExtStr(item, bDot) + if extItem.lower() == ext.lower(): + listRet.append(item) + + return listRet + +# 파일 이름에서 확장자를 뽑아낸다. True : '.' 을 포함한다. +def GetExtStr(file_path, bDot = True): + retStr = "" + # 파일 경로에서 마지막 점을 찾아 확장자를 추출 + last_dot_index = file_path.rfind('.') + if last_dot_index == -1: + retStr = "" # 점이 없는 경우 확장자가 없음 + else: + if True == bDot: + retStr = file_path[last_dot_index:] + else: + retStr = file_path[last_dot_index+1:] + + return retStr + +# 문자열에 포함된 단어를 지운다. +def RmvSubString(mainString, subString): + # 문자열에서 부분 문자열의 인덱스를 찾습니다. + strIdx = mainString.find(subString) + if strIdx == -1: # 부분 문자열이 존재하지 않으면 그대로 반환합니다. + return mainString + + endIdx = strIdx + len(subString) + + # 부분 문자열을 제거하고 새로운 문자열을 반환합니다. + return mainString[:strIdx] + mainString[endIdx:] + +def ExtractZIP(zip_file, extract_to): + with zipfile.ZipFile(zip_file, 'r') as zf: + zf.extractall(extract_to) + +# +def CreateZIP(output_zip, *files): + with zipfile.ZipFile(output_zip, 'w') as zf: + for file in files: + zf.write(file, os.path.basename(file)) + + bRet = False + if os.path.exists(output_zip): + bRet = True + + return bRet + +# 파일 리스트에 들어있는 파일만 골라서 압축을 합니다. 상대경로를 제거하는게 기본값. +def CreateZIPShell(zipName, *files, bRmvRPath = True): + command = "zip " + + if True == bRmvRPath: + command += "-j " + + command += f"\"{zipName}\" " + + # 이중 리스트인 이유를 모르겠다. + for file in files: + strTemp = "" + if isinstance(file, list): + strTemp = ' '.join(file) + else: + strTemp = f"\"{file}\" " + + command += strTemp + + + # for item in file: + # command += f"\"{item}\" " + + result = subprocess.run(command, shell=True, capture_output=True, text=True) + + bRet = False + if 0 == result.returncode: + bRet = True + + return bRet + +# 특정 확장자만 쉘을 이용해서 압축한다 +def CreateZIPShExt(zipName, TrgExt): + command = f"zip -j {zipName} *.{TrgExt}" + + result = subprocess.run(command, shell=True, capture_output=True, text=True) + + bRet = False + if 0 == result.returncode: + bRet = True + + return bRet + +# JSON 을 트리 구조로 출력한다. +def PrintJSONTree(data, indent=0): + if isinstance(data, dict): + for key, value in data.items(): + print(' ' * indent + str(key)) + PrintJSONTree(value, indent + 1) + elif isinstance(data, list): + for item in data: + PrintJSONTree(item, indent) + else: + print(' ' * indent + str(data)) + \ No newline at end of file diff --git a/main.py b/main.py index e55c808..8b9ebaf 100644 --- a/main.py +++ b/main.py @@ -1,12 +1,19 @@ import GetArc_Hitomi as getHitomi import GetArc_Ehentai as getEhentai +import MgrCalibreLibs as mgrCal + +import UtilPack as util def main(): - getHitomi.GetSearchResult("trouble sweets") - getEhentai.GetSearchResult("artist%3A%22kotomi+yo-ji%24%22") + #getHitomi.GetSearchResult("2890685") + #etEhentai.GetSearchResult("artist%3A%22kotomi+yo-ji%24%22") + + mgrCal.Start() + #util.printDbgMessages() #artist:"kotomi yo-ji$" #"artist%3A%22kotomi+yo-ji%24%22" + # For Main Loop diff --git a/pupildata.py b/pupildata.py new file mode 100644 index 0000000..1764627 --- /dev/null +++ b/pupildata.py @@ -0,0 +1,93 @@ +import json + +import UtilPack as util +import DataClass as info + +GALBLOCK = "galleryBlock" +GALURL = "galleryUrl" +GALINFO = "galleryInfo" +GALTAGS = "relatedTags" +JTITLE = "japanese_title" + +# Example +#with open('test.db', 'r') as file: +# data = json.load(file) +#print_json_tree(data) +#print(data['galleryInfo']['tags']) + +# pupil : Json +# Caribre : text +# My : CSV +class PupuilInfoFile: + m_data = None + + def __init__(self, path): + self.path = path + + def __enter__(self): + self.DBXLSOpen(self.path) + + def __exit__(self, ex_type, ex_value, traceback): + self.DBXLSClose() + + def PupilJSONOpen(self, path): + with open(path, 'r') as file: + self.m_data = json.load(file) + + # pupil 의 JSON 을 파싱해서 DataClass 로 반환한다. + def GetInfo(self): + if None == self.m_data: + return None + + title = self.m_data[GALINFO]["title"] + url = self.m_data[GALBLOCK]["galleryUrl"] + + retInfo = info(title, url) + retInfo.type = self.m_data[GALINFO]["type"] + retInfo.language = self.m_data[GALINFO]["language"] + retInfo.gallery_id = self.m_data[GALINFO]["id"] + + listArtists = self.m_data[GALINFO]["artists"] + for item in listArtists: + strArtist = item["artist"] + strUrl = item["url"] + strTag = f"artist:{strArtist}" + + tempInfo = util.TagInfo(strTag, strUrl) + retInfo.AddArtist(tempInfo) + + listTags = self.m_data[GALINFO]["tags"] + for item in listTags: + strGend = "" + if 1 == item["female"]: + strGend = "female:" + elif 1 == item["male"]: + strGend = "male:" + + strTag = item["tag"] + strRelatedTag = f"{strGend}:{strTag}" + + tagUrl = item[url] + + tempInfo = util.TagInfo(strRelatedTag, tagUrl) + retInfo.AddTag(tempInfo) + + return retInfo + + # pupil 의 JSON 을 파싱해서 ImageFileList 를 반환한다. + def GetImageFilesInfo(self): + if None == self.m_data: + return None + + listRet = set() + listFiles = self.m_data[GALINFO]["files"] + for item in listFiles: + tempInfo = info.ImageFileInfo(item["name"], + item["height"], + item["width"], + item["hash"], + item["haswebp"]) + listRet.append(tempInfo) + + return listRet + diff --git a/rarzipcbz.py b/rarzipcbz.py new file mode 100644 index 0000000..6c5734e --- /dev/null +++ b/rarzipcbz.py @@ -0,0 +1,181 @@ +import os +import rarfile +import zipfile +import shutil +import difflib + +img_exts = [".jpg",".png",".jpeg",".webp"] +TrgBasePath = "/Volumes/NewDataStor/Backup/files/" +m_Debug = False + +def main(debug=False): + m_Debug = debug + + TrgBasePath = os.path.abspath("/Volumes/NewDataStor/Backup/files/") + + # 설정한 경로가 유효한가? + if False == os.path.exists(TrgBasePath): + print("Not Valid Path") + return + + # 압축을 죄다 푼다. + # -> 일단 이 과정은 생략 + + # 폴더만 죄다 골라내서.. + listTrgPaths = ListSubDirectories(TrgBasePath) + + dbgmsg(listTrgPaths) + for path in listTrgPaths: + #이미지 파일만 골라 리스트업. + contents = os.listdir(path) + listImgFiles = [] + + for item in contents: + if True == os.path.isdir(item): + continue + + ext = get_extension_str(item) + if ext in img_exts: + if os.path.exists(item): + listImgFiles.append(item) + else: + strTemp = os.path.join(path, item) + listImgFiles.append(strTemp) + + dbgmsg(listImgFiles) + + if 0 >= len(listImgFiles): + continue + + # 폴더 이름을 조합해서 파일 이름을 만든다. + strArcName = MakeCBZName(path) + ".cbz" + strArcPath = os.path.join(TrgBasePath, strArcName) + + # 이미 있으면 패스 + if os.path.exists(strArcName): + continue + else: + CreateZIP(strArcPath, *listImgFiles) + + + # 폴더를 압축한다. + # 폴더 안의 파일을 정리한다. .url, .txt, .db 뺸다 + # 일단 test.zip 로 압축하고, 원래 폴더 이름으로 변경 + # 만약 작가폴더 안에 작품이 들어있다면... + # 이걸 알 수는 없으니 그냥 앞에 대괄호 넣고 폴더 이름을 붙인다. + # 가능하면 하위 폴더 이름에 작가 이름이 들어있다면 그냥 생략하자. 가능하다면... + + +def MakeCBZName(path): + strSrcPath = os.path.abspath(path) + + if len(strSrcPath) < len(TrgBasePath) or TrgBasePath == strSrcPath: + return strSrcPath.replace("/", "-") + + strTemp = RmvSubString(strSrcPath, TrgBasePath) + listPar = strTemp.split(os.sep) + + strRet = listPar.pop(); + for item in reversed(listPar): + IdxTmp = strRet.find(item) + if IdxTmp == -1: + strRet = "[" + item + "]" + strRet + else: + continue + + return strRet + +# +def RmvSubString(main_string, substring): + # 문자열에서 부분 문자열의 인덱스를 찾습니다. + start_index = main_string.find(substring) + if start_index == -1: # 부분 문자열이 존재하지 않으면 그대로 반환합니다. + return main_string + end_index = start_index + len(substring) + + # 부분 문자열을 제거하고 새로운 문자열을 반환합니다. + return main_string[:start_index] + main_string[end_index:] + +# +def dbgmsg(string): + if True == m_Debug: + print(string) + +# +def ExtractRAR(file_path, extract_path): + with rarfile.RarFile(file_path, 'r') as rf: + rf.extractall(extract_path) + +# +def ExtractZIP(zip_file, extract_to): + with zipfile.ZipFile(zip_file, 'r') as zf: + zf.extractall(extract_to) + +# +def CreateZIP(output_zip, *files): + with zipfile.ZipFile(output_zip, 'w') as zf: + for file in files: + zf.write(file, os.path.basename(file)) + + bRet = False + if os.path.exists(output_zip): + bRet = True + + return bRet + +# +def ListSubDirectories(root_dir): + subdirectories = [] + + # root_dir에서 하위 디렉토리 및 파일 목록을 얻음 + for dirpath, dirnames, filenames in os.walk(root_dir): + # 하위 디렉토리 목록을 반복하며 하위 디렉토리만 추출 + for dirname in dirnames: + path = os.path.join(dirpath, dirname) + + if True == IsFinalFolder(path): + subdirectories.append(path) + + return subdirectories + +# +def IsFinalFolder(path): + bRet = True + + contents = os.listdir(path) + for item in contents: + if True == os.path.isdir(item): + bRt = False + break + + return bRet; + +# +def get_extension_str(file_path): + # 파일 경로에서 마지막 점을 찾아 확장자를 추출 + last_dot_index = file_path.rfind('.') + if last_dot_index == -1: + return "" # 점이 없는 경우 확장자가 없음 + else: + return file_path[last_dot_index:] + +# +def isRAR(file): + if True == os.path.isdir(file): + return False + + if ".rar" != get_extension_str(file): + return False + + return True + +# +def ReExt(path, srcExt, trgExt): + nCnt = -1 + + return nCnt + + +# For Main Loop +if __name__ == '__main__': + main(False)