Update DataClass.py, GetArc_Ehentai.py, and 7 more files...

데이터 클래스 정의, 퍼필 다운로더 json 파서...
2024-05-13 21:48:40 +09:00
parent cc02151899
commit e9f8a7323c
9 changed files with 762 additions and 218 deletions
--- a/DataClass.py
+++ b/DataClass.py
@@ -1,48 +1,50 @@
-class MangaMetaInfo:
+class CBZInfo:
-    title = ""
+    def __init__(self, title, url):
-    url = ""
+        self.title = title
-    Series = ""
+        self.url = url
-    artists = []
+        self.serires = ""
-    Tags = []
+        self.type = ""
-    fileurl = ""
+        self.filename = ""
-    galID = ""
+        self.torrent = ""
-    
+        self.language = ""
-    def AddArtist(self, strArtist):
+        self.gallery_id = 0
-        self.artists.append(strArtist)
+        # 중복을 허용하지 않는 집합으로 초기화
-        
+        self.related_galID = set()
-    def AddTag(self, strTag):
+        self.artists = set()
-        self.tags.append(strTag)        
+        self.tags = set()
-    
+
-class SeriesInfo:
+    def __str__(self):
-    strSeries = ""
+        return f"{self.title} by {self.author} ({self.publication_year})"
-    strUrl = ""
+
-    
+    def AddTag(self, name):
-    def __init__(self, series, url):
+        self.tags.add(name)
-        self.strSeries = series
+
-        self.strUrl = url
+    def RmvTag(self, name):
-    
+        self.tags.discard(name)
-    
+        
-class TypeInfo:
+    def AddArtist(self, name):
-    strTyep = ""
+        self.artists.add(name)
-    strUrl = ""
+
-    
+    def RmvArtist(self, name):
-    def __init__(self, strType, url):
+        self.artists.discard(name)
-        self.strTyep = strType
+        
        self.strUrl = url
 class ArtistInfo:
    strArtist = ""
    strUrl = ""
-    def __init__(self, artist, url):
+class TagInfo:
-        self.strArtist = artist
+    def __init__(self, name, url):
-        self.strUrl = url
+        self.name = name
        self.url = url
-class TagIngo:
+    def __str__(self):
-    strTag = ""
+        return f"{self.name} : {self.url}"
    strUrl = ""
-    def __init__(self, tag, url):
+
-        self.strTag = tag
+class ImageFileInfo:
-        self.strUrl = url
+    def __init__(self, name, height, width, hashValue, bWebp):
-        
+        self.name = name
        self.height = height
        self.width = width
        self.hashValue = hashValue
        self.bWebp = bWebp
--- a/GetArc_Ehentai.py
+++ b/GetArc_Ehentai.py
@@ -6,10 +6,9 @@ from selenium.common.exceptions import TimeoutException
 from bs4 import BeautifulSoup
 import UtilPack as util
 import DataClass as info
 listResult = []
 #
 def GetSearchResult(searchWord):
    url = getSiteUrl(searchWord)
@@ -35,6 +34,14 @@ def GetSearchResult(searchWord):
    parseMangaInfos(strContent)
    pass
 def GetGalInfoFromID(gal_id):
    #익헨은 https://koromo.xyz/api/search/ehash?id=번호 로 검색해서 토큰값 얻은 다음에
    #https://exhentai.org/g/번호/토큰
    #https://e-hentai.org/api/search/ehash?id=2890782
    pass
 #
 def getSiteUrl(searchWord):
@@ -56,7 +63,6 @@ def parseMangaInfos(html_doc):
        gl1cParse(gl)
        gl2cParse(gl)
        gl3cParse(gl)
        print("\r\n")
 # type
--- a/GetArc_Hitomi.py
+++ b/GetArc_Hitomi.py
@@ -6,9 +6,10 @@ from selenium.common.exceptions import TimeoutException
 from bs4 import BeautifulSoup
 import UtilPack as util
 import DataClass as info
-listResult = []
+
-    
+#
 def GetSearchResult(searchWord):
    url = getSiteUrl(searchWord)
@@ -39,70 +40,104 @@ def getSiteUrl(searchWord):
    strRet = "https://hitomi.la/"
    if False == util.IsEmptyStr(searchWord):
-        strRet = strRet + "search.html?" + searchWord
+        if False == searchWord.isdigit():
            strRet = strRet + "search.html?" + searchWord
        else:
            strRet = strRet + "galleries/" + searchWord + ".html"
    return strRet
 #
 def parseMangaInfos(html_doc):
    # BeautifulSoup 객체 생성
    soup = BeautifulSoup(html_doc, 'html.parser')
    gallery_elements = soup.find_all(class_='gallery-content')
    listDJs = []
    for element in gallery_elements:
-        djParse(element)
+        listDJ = djParse(element)
        listDJs.extend(listDJ)
    print(len(listDJs))
 def djParse(soup_element):
    childs = soup_element.find_all(class_='dj')
    listInfos = []
    for child in childs:
-        djtitleParse(child)
+        info = djTitleParse(child)
        artistlistParse(child)
        djDescParse(child)
-        print("\r\n")
+        listTag1 = djArtistParse(child, info)
        listTag2 = djDescParse(child, info)
        listInfos.append(info)
-def djtitleParse(soup_element):
+    return listInfos
-    element = soup_element.find('h1', class_='lillie')
+
 def djTitleParse(input_element):
    element = input_element.find('h1', class_='lillie')
    title = element.text
    a_tag = element.find('a')
-    a_url = a_tag.get('href')
+    url = a_tag.get('href')
    print("title : " + title)
    print("URl : " + a_url)
-def artistlistParse(soup_element):
+    util.DbgOut("title : " + title)
-    element = soup_element.find('div', class_='artist-list')
+    util.DbgOut("URl : " + url)
-    print("artists")
+    return info.CBZInfo(title, url)
 def djArtistParse(input_element, retPtr):
    element = input_element.find('div', class_='artist-list')
    a_tags = element.find_all('a')
    listArtists = []
    for tag in a_tags:
        artist = tag.text
        a_url = tag.get('href')
-        print("    " + artist + " " + a_url)
+        retPtr.AddArtist(artist)
        listArtists.append( info.TagInfo(artist, a_url) )
    return listArtists
-def djDescParse(soup_element):
+
-    element = soup_element.find('table', class_='dj-desc')
+def djDescParse(input_element, retPtr):
    element = input_element.find('table', class_='dj-desc')
    tb_rows = element.find_all('tr')
    listTags = []
    for row in tb_rows:
        tds = row.find_all('td')
        if 2 != len(tds):
-            print("td get failed")
+            util.DbgOut("Warning : td get failed")
            continue
-        print(tds[0].text + " : ")
+        
        outMsg = f"{tds[0].text} : \r\n"
        a_tags = tds[1].find_all('a')
        for tag in a_tags:
            tag_name = tag.text
            tag_url = tag.get('href')
-            print("        " + tag_name + " " + tag_url)
+            
            retPtr.AddTag(tag_name)
            listTags.append(info.TagInfo(tag_name, tag_url))
            outMsg += f"        {tag_name} {tag_url}\r\n"
-    pass
+        util.DbgOut(outMsg)
        #
        if "Series" == tds[0]:
            retPtr.serires = listTags[-1].name
        elif "Type" == tds[0]:
            retPtr.type = listTags[-1].name
        elif "Language" == tds[0]:
            retPtr.language = listTags[-1].name
        else:
            pass
    return listTags
--- a/MgrCalibreLibs.py
+++ b/MgrCalibreLibs.py
@@ -0,0 +1,98 @@
 import os
 import rarfile
 import zipfile
 import shutil
 import difflib
 import UtilPack as util
 import subprocess as ps
 from PIL import Image
 m_ImgExts = [".jpg",".png",".jpeg",".webp"]
 m_CalLibPath = "/Volumes/NewDataStor/calibre_lib/"
 def Start():
    pathTrg = os.path.abspath(m_CalLibPath)
    if False == os.path.exists(pathTrg):
        util.DbgOut("Error : Invalid Path")
    listPaths = util.ListSubDirectories(pathTrg)
    print(f"Input : {len(listPaths)} \r\n")
    nIdx = 0;
    listWorked = []
    listSkiped = []
    strCoverName = "cover.jpg"
    for path in listPaths:
        nIdx += 1
        # 조건은 커버파일이 있고 크기가 0 일 경우가 문제가 되는데...
        pathCover = os.path.join(path, strCoverName)
        if True == os.path.exists(pathCover) and 0 < os.path.getsize(pathCover):
            continue
        # 조건에 맞는 폴더에서 cbz 파일을 찾아낸다.
        nameCBZFile = ""
        contents = os.listdir(path)
        for item in contents:
            extTmp = util.GetExtStr(item, False)
            if extTmp.lower() == "cbz":
                nameCBZFile = item
                break
        # cbz 없으면 일단 넘어간다.
        if nameCBZFile == "":
            continue
        # # 압축을 풀고
        pathCBZFull = os.path.join(path, nameCBZFile)
        util.ExtractZIP(pathCBZFull, path)
        # 하위 폴더를 훑어서 이미지 파일을 가져온다. 일단 webp 만...
        listTrgFiles = util.ListFileExtRcr(path, "webp")
        if 0 >= len(listTrgFiles):
            continue
        # CBZ 파일 삭제
        os.remove(pathCBZFull)
        # 다시 압축을 한다
        if False == util.CreateZIPShell(pathCBZFull, listTrgFiles):
            print(f"error : {pathCBZFull}")
            break
        # 크기가 0 인 커버 지운다.
        if True == os.path.exists(pathCover):
            os.remove(pathCover)
        # 원본 WebP 이미지 파일 경로
        webp_file = listTrgFiles[0]
        # JPEG로 변환할 이미지를 엽니다
        try:
            with Image.open(webp_file) as img:
                # 새로운 파일명과 확장자를 지정하여 저장합니다
                img.save(pathCover, "JPEG")
        except Exception as e:
            listSkiped.append(pathCBZFull)
            print(f"커버 변환중 에러 : {e}")
            continue
        # webp 파일들 삭제    
        for fileTrg in listTrgFiles:
            if True == os.path.isfile(fileTrg):
                os.remove(fileTrg)
        # 성공 로그!
        print(f"Success : {nIdx} : {pathCBZFull}\r\n")
        listWorked.append(pathCBZFull)
    print(f"complete : {len(listWorked)}")
    print(f"Something Wrong : {listSkiped}")
--- a/StoreXLS.py
+++ b/StoreXLS.py
@@ -5,159 +5,112 @@ from openpyxl.utils import xlsUtils
 import DataClass as stManga
 import UtilPack as util
-
+class DBXLStorage:
-xls_name = "mangaDB.xlsx"
+    xls_name = "mangaDB.xlsx"
-m_wb = None
+    xls_path = ""
 sheetName_Mangainfo = "MangaInfo" 
 sheetName_Artists = "Artists"
 sheetName_Tags = "Tags"
 strMngSht = "MngInfo"
 strArtSht = "ArtInfo"
 strTagSht = "TagInfo"
 #
 def DBXLSOpen(path):
    xls_path = GetXLSPath(path)
    util.DbgOut(xls_path)
    try:
        wb = opyxl(xls_path)
        util.DbgOut("xls Open Successed")
    except FileNotFoundError:
        wb = opyxl()
        util.DbgOut("xls Created")
    if wb is None:
        util.DbgOut("XLS Open Something Wrong...")
        return
    m_wb = wb
    ws = wb.active
    # time, title, url, tags (comma)
    ws['A1'] = "Modified Time"
    ws['B1'] = int(time.time())
    if 'list' not in wb.sheetnames:
        ws1 = wb.create_sheet(title='list')
        print('list sheet created')
    wb.save(xls_path)
    ws2 = wb['list']
    print(str(index) + " searched")
 def DBXLSClose():
    if m_wb is None:
        return
    m_wb.save(xls_path)
    m_wb.close()
    m_wb = None
    sheetName_Mangainfo = "MangaInfo" 
    sheetName_Artists = "Artists"
    sheetName_Tags = "Tags"
-#
+    strMngSht = "MngInfo"
-def WriteMangaInfos(*listInfos):
+    strArtSht = "ArtInfo"
-    if False == isinstance(listInfos, list):    
+    strTagSht = "TagInfo"
        return
-    ws_mng = getSheet(strMngSht)
+    m_openedXLS = ""
    if None == ws_mng:
        return
-    #for item in listInfos:
+    def __init__(self, path):
-        # 클래스 타잎을 확인해야 하지만만.. 생략.
+        self.path = path
        # title, url, artist, group, series(parady), type, tags, hitomi ID, hitomi file, eh ID, eh tor
-def AddTagInfo(tagInfo):
+    def __enter__(self):
-    pass
+        self.DBXLSOpen(self.path)
 def AddTagInfo(strTag, strUrl):
    pass
 def AddArtistInfo(artistInfo):
    pass
 def AddArAddArtistInfo(strArtist, strUrl):
    pass
 def AddSeriesInfo(SeriesInfo):
    pass
 def AddSeriesInfo(strSerires, strUrl):
    pass
 def AddTypeInfo(typeInfo):
    pass
 def AddTypeInfo(strType, strUrl):
    pass
-def getSheet(sheetName):
+    def __exit__(self, ex_type, ex_value, traceback):
-    if None == m_wb:
+        self.DBXLSClose()
-        return
+        
    def DBXLSOpen(self, path):
        xls_path = self.GetXLSPath(path)
        util.DbgOut(xls_path)
        try:
            m_wb = opyxl(xls_path)
            util.DbgOut("xls Open Successed")
        except FileNotFoundError:
            m_wb = opyxl()
            util.DbgOut("xls Created")
        if m_wb is None:
            util.DbgOut("XLS Open Something Wrong...")
            m_openedXLS = ""
            m_wb = None
            return
    def DBXLSClose(self):
        if self.m_wb is None or self.m_openedXLS is None:
            util.DbgOut("XLS Close something wrong...")
            return
        self.m_wb.save(self.m_openedXLS)
        self.m_wb.close()
        self.m_wb = None
    #
    def WriteMangaInfos(self, *listInfos):
        if False == isinstance(listInfos, list):    
            return
        ws_mng = self.getSheet(self.strMngSht)
        if None == ws_mng:
            return
        #for item in listInfos:
            # 클래스 타잎을 확인해야 하지만만.. 생략.
            # ttist, group, series(parady), type, tags, hitomi ID, hitomi file, eh ID, eh tor
    def AddTagInfo(self, tagInfo):
        pass
-    if sheetName in m_wb.sheetnames:
+    def AddTagInfo(self, strTag, strUrl):
-        return m_wb[sheetName]
+        pass
-    return m_wb.create_sheet(title=sheetName)
+    def AddArtistInfo(self, artistInfo):
-
+        pass
 #
 def GetXLSPath(path):
    retPath = path
    if False == os.path.exists(path):
        retPath = os.path.abspath(__file__)
-    return retPath + xls_name
+    def AddArAddArtistInfo(self, strArtist, strUrl):
-
+        pass
 # #
 # def XLSWriteMangainfo(title, url, *tags):
 #     #    
 #     try:
 #         wb = load_workbook(xls_path)
 #         print("Open Successed")
 #     except FileNotFoundError:
 #         wb = Workbook()
 #         print("xls Created")
 #     ws = wb.active
 #     # time, title, url, tags (comma)
 #     ws['A1'] = "Modified Time"
 #     ws['B1'] = int(time.time())
    def AddSeriesInfo(self, SeriesInfo):
        pass
-#     if 'list' not in wb.sheetnames:
+    def AddSeriesInfo(self, strSerires, strUrl):
-#         ws1 = wb.create_sheet(title='list')
+        pass
 #         print('list sheet created')
-#     wb.save(xls_path)
+    def AddTypeInfo(self, typeInfo):
        pass
-#     ws2 = wb['list']
+    def AddTypeInfo(self, strType, strUrl):
-
+        pass
-# # 폴더 경로
+    
-# folder_path = '/media/gerd/test/hiyobi_temp/'
+    # 시트를 가져온다. 엑셀 파일이 안 열려 있으면 None, 있으면 반환하고, 없으면 만들어서.
-
+    def getSheet(self, sheetName):
-# # 폴더 내의 파일 및 폴더 목록 가져오기
+        retSheet = None
-# items = os.listdir(folder_path)
+        
-
+        if self.m_wb:
-# index = 2
+            if sheetName in self.m_wb.sheetnames:
-# # 파일 및 폴더 목록 출력
+                retSheet = self.m_wb[sheetName]
-# for item in items:
+            else:
-#     pos = 'A' + str(index)
+                retSheet = self.m_wb.create_sheet(title=sheetName)
-#     ws2[pos] = item
+        
-#     index += 1
+        return retSheet
-#     #print(item)
+    
-
+    # 데이터베이스용 엑셀 파일의 전체 경로를 얻어온다.
-# print(str(index) + " searched")
+    def GetXLSPath(self, path):
-# wb.save(xls_path)
+        retPath = path
-
+        if False == os.path.exists(path):
-# wb.close()
+            retPath = os.path.abspath(__file__)
        return os.path.join(retPath, self.xls_name) 
--- a/UtilPack.py
+++ b/UtilPack.py
@@ -1,5 +1,11 @@
 import os
 import time
 import rarfile
 import zipfile
 import shutil
 import difflib
 import subprocess
 m_dbgLevel = 0
 listDbgStr = []
@@ -26,14 +32,177 @@ def GetCurrentTime():
    return strRet 
 #for debug
-def DbgOut(str):
+def DbgOut(strInput, bPrint = False):
-    strMsg = GetCurrentTime() +" : " + str
+    strMsg = (f"{GetCurrentTime()} : {strInput}")
    listDbgStr.append(strMsg)
-    print(strMsg)
+    
    if True == bPrint:
        print(strMsg)
 def printDbgMessages():
    for line in listDbgStr:
        print(line)
 # 입력된 경로의 자식 폴더를 찾아 반환한다.
 # 반환하는 리스트는 리커시브 - 손자, 증손자 폴더까지 전부 포함한다
 def ListSubDirectories(root_dir):
    subdirectories = []
    # root_dir에서 하위 디렉토리 및 파일 목록을 얻음
    for dirpath, dirnames, filenames in os.walk(root_dir):
        # 하위 디렉토리 목록을 반복하며 하위 디렉토리만 추출
        for dirname in dirnames:
            path = os.path.join(dirpath, dirname)
            if True == IsFinalFolder(path):
                subdirectories.append(path)
    return subdirectories
 def ListFileExtRcr(pathTrg, strExt):
    listRet= []
    # pathTrg의 하위 디렉토리 및 파일 목록을 얻음
    for dirpath, dirnames, filenames in os.walk(pathTrg):
        for file in filenames:
            extTmp = GetExtStr(file, False)
            if extTmp.lower() == strExt and file.startswith('.'):
                listRet.append(os.path.join(dirpath, file))
    return listRet
 # 입력된 경로가 자식 폴더를 가지고 있는지 판단한다.- 최종 폴더인지 여부 
 # 자식이 없으면 True, 자식이 있으면 False
 def IsFinalFolder(path):
    bRet = True
    contents = os.listdir(path)
    for item in contents:
        if True == os.path.isdir(item):
            bRt = False
            break
    return bRet;
 # 어떤 경로 안에서 특정 확장자의 파일을 뽑아내어 그 리스트를 반환한다.
 def FindFileFromExt(path, ext):
    bDot = False
    if 0 <= ext.find('.'):
        bDot = True
    listRet = []
    if False == os.path.exists(path):
        return listRet
    contents = os.listdir(path)
    for item in contents:
        if True == os.path.isdir(item):
            continue
        extItem = GetExtStr(item, bDot)
        if extItem.lower() == ext.lower():
            listRet.append(item)
    return listRet
 # 파일 이름에서 확장자를 뽑아낸다. True : '.' 을 포함한다.
 def GetExtStr(file_path, bDot = True):
    retStr = ""
    # 파일 경로에서 마지막 점을 찾아 확장자를 추출
    last_dot_index = file_path.rfind('.')
    if last_dot_index == -1:
        retStr = ""  # 점이 없는 경우 확장자가 없음
    else:
        if True == bDot:
            retStr = file_path[last_dot_index:]
        else:
            retStr = file_path[last_dot_index+1:]
    return retStr
 # 문자열에 포함된 단어를 지운다.
 def RmvSubString(mainString, subString):
    # 문자열에서 부분 문자열의 인덱스를 찾습니다.
    strIdx = mainString.find(subString)
    if strIdx == -1:  # 부분 문자열이 존재하지 않으면 그대로 반환합니다.
        return mainString
    endIdx = strIdx + len(subString)
    # 부분 문자열을 제거하고 새로운 문자열을 반환합니다.
    return mainString[:strIdx] + mainString[endIdx:]
 def ExtractZIP(zip_file, extract_to):
    with zipfile.ZipFile(zip_file, 'r') as zf:
        zf.extractall(extract_to)
 #
 def CreateZIP(output_zip, *files):
    with zipfile.ZipFile(output_zip, 'w') as zf:
        for file in files:
            zf.write(file, os.path.basename(file))
    bRet = False
    if os.path.exists(output_zip):
        bRet = True
    return bRet
 # 파일 리스트에 들어있는 파일만 골라서 압축을 합니다. 상대경로를 제거하는게 기본값.
 def CreateZIPShell(zipName, *files, bRmvRPath = True):
    command = "zip "
    if True == bRmvRPath:
        command += "-j "
    command += f"\"{zipName}\" "
    # 이중 리스트인 이유를 모르겠다.
    for file in files:
        strTemp = ""
        if isinstance(file, list):
            strTemp = ' '.join(file)
        else:
            strTemp = f"\"{file}\" "
        command += strTemp
    #    for item in file:
    #        command += f"\"{item}\" "
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    bRet = False
    if 0 == result.returncode:
        bRet = True
    return bRet
 # 특정 확장자만 쉘을 이용해서 압축한다
 def CreateZIPShExt(zipName, TrgExt):
    command = f"zip -j {zipName} *.{TrgExt}"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    bRet = False
    if 0 == result.returncode:
        bRet = True
    return bRet
 # JSON 을 트리 구조로 출력한다.
 def PrintJSONTree(data, indent=0):
    if isinstance(data, dict):
        for key, value in data.items():
            print('  ' * indent + str(key))
            PrintJSONTree(value, indent + 1)
    elif isinstance(data, list):
        for item in data:
            PrintJSONTree(item, indent)
    else:
        print('  ' * indent + str(data))
--- a/main.py
+++ b/main.py
@@ -1,12 +1,19 @@
 import GetArc_Hitomi as getHitomi
 import GetArc_Ehentai as getEhentai
 import MgrCalibreLibs as mgrCal
 import UtilPack as util
 def main():
-    getHitomi.GetSearchResult("trouble sweets")
+    #getHitomi.GetSearchResult("2890685")
-    getEhentai.GetSearchResult("artist%3A%22kotomi+yo-ji%24%22")
+    #etEhentai.GetSearchResult("artist%3A%22kotomi+yo-ji%24%22")
    mgrCal.Start()
    #util.printDbgMessages()
    #artist:"kotomi yo-ji$"
    #"artist%3A%22kotomi+yo-ji%24%22"
 # For Main Loop
--- a/pupildata.py
+++ b/pupildata.py
@@ -0,0 +1,93 @@
 import json
 import UtilPack as util
 import DataClass as info
 GALBLOCK = "galleryBlock"
 GALURL = "galleryUrl"
 GALINFO = "galleryInfo"
 GALTAGS = "relatedTags"
 JTITLE = "japanese_title"
 # Example
 #with open('test.db', 'r') as file:
 #    data = json.load(file)
 #print_json_tree(data)
 #print(data['galleryInfo']['tags'])
 # pupil : Json
 # Caribre : text
 # My : CSV
 class PupuilInfoFile:
    m_data = None
    def __init__(self, path):
        self.path = path
    def __enter__(self):
        self.DBXLSOpen(self.path)
    def __exit__(self, ex_type, ex_value, traceback):
        self.DBXLSClose()
    def PupilJSONOpen(self, path):
        with open(path, 'r') as file:
            self.m_data = json.load(file)
    # pupil 의 JSON 을 파싱해서 DataClass 로 반환한다.
    def GetInfo(self):
        if None == self.m_data:
            return None
        title = self.m_data[GALINFO]["title"]
        url = self.m_data[GALBLOCK]["galleryUrl"]
        retInfo = info(title, url)
        retInfo.type = self.m_data[GALINFO]["type"]
        retInfo.language = self.m_data[GALINFO]["language"]
        retInfo.gallery_id = self.m_data[GALINFO]["id"]
        listArtists = self.m_data[GALINFO]["artists"]
        for item in listArtists:
            strArtist = item["artist"]
            strUrl = item["url"]
            strTag = f"artist:{strArtist}"
            tempInfo = util.TagInfo(strTag, strUrl)
            retInfo.AddArtist(tempInfo)
        listTags = self.m_data[GALINFO]["tags"]
        for item in listTags:
            strGend = ""
            if 1 == item["female"]:
                strGend = "female:"
            elif 1 == item["male"]:
                strGend = "male:"
            strTag = item["tag"]
            strRelatedTag = f"{strGend}:{strTag}"
            tagUrl = item[url]
            tempInfo = util.TagInfo(strRelatedTag, tagUrl)
            retInfo.AddTag(tempInfo)
        return retInfo
    # pupil 의 JSON 을 파싱해서 ImageFileList 를 반환한다.
    def GetImageFilesInfo(self):
        if None == self.m_data:
            return None
        listRet = set()
        listFiles = self.m_data[GALINFO]["files"]
        for item in listFiles:
            tempInfo = info.ImageFileInfo(item["name"], 
                                          item["height"], 
                                          item["width"], 
                                          item["hash"], 
                                          item["haswebp"])            
            listRet.append(tempInfo)
        return listRet
--- a/rarzipcbz.py
+++ b/rarzipcbz.py
@@ -0,0 +1,181 @@
 import os
 import rarfile
 import zipfile
 import shutil
 import difflib
 img_exts = [".jpg",".png",".jpeg",".webp"]
 TrgBasePath = "/Volumes/NewDataStor/Backup/files/"
 m_Debug = False
 def main(debug=False):
    m_Debug = debug
    TrgBasePath = os.path.abspath("/Volumes/NewDataStor/Backup/files/")
    # 설정한 경로가 유효한가?
    if False == os.path.exists(TrgBasePath):
        print("Not Valid Path")
        return
    # 압축을 죄다 푼다.
    # -> 일단 이 과정은 생략
    # 폴더만 죄다 골라내서..
    listTrgPaths = ListSubDirectories(TrgBasePath)
    dbgmsg(listTrgPaths)
    for path in listTrgPaths:
        #이미지 파일만 골라 리스트업.
        contents = os.listdir(path)
        listImgFiles = []
        for item in contents:
            if True == os.path.isdir(item):
                continue                
            ext = get_extension_str(item)            
            if ext in img_exts:
                if os.path.exists(item):
                    listImgFiles.append(item)
                else:
                    strTemp = os.path.join(path, item)
                    listImgFiles.append(strTemp)
        dbgmsg(listImgFiles)
        if 0 >= len(listImgFiles):
            continue
        # 폴더 이름을 조합해서 파일 이름을 만든다.
        strArcName = MakeCBZName(path) + ".cbz"
        strArcPath = os.path.join(TrgBasePath, strArcName)
        # 이미 있으면 패스
        if os.path.exists(strArcName):
            continue
        else:
            CreateZIP(strArcPath, *listImgFiles)
    # 폴더를 압축한다.
    # 폴더 안의 파일을 정리한다. .url, .txt, .db 뺸다
    # 일단 test.zip 로 압축하고, 원래 폴더 이름으로 변경
    # 만약 작가폴더 안에 작품이 들어있다면...
    # 이걸 알 수는 없으니 그냥 앞에 대괄호 넣고 폴더 이름을 붙인다.
    # 가능하면 하위 폴더 이름에 작가 이름이 들어있다면 그냥 생략하자. 가능하다면...
 def MakeCBZName(path):
    strSrcPath = os.path.abspath(path)
    if len(strSrcPath) < len(TrgBasePath) or TrgBasePath == strSrcPath:
        return strSrcPath.replace("/", "-")
    strTemp = RmvSubString(strSrcPath, TrgBasePath)
    listPar = strTemp.split(os.sep)
    strRet = listPar.pop();
    for item in reversed(listPar):
        IdxTmp = strRet.find(item)
        if IdxTmp == -1:
            strRet = "[" + item + "]" + strRet
        else:
            continue
    return strRet
 #
 def RmvSubString(main_string, substring):
    # 문자열에서 부분 문자열의 인덱스를 찾습니다.
    start_index = main_string.find(substring)
    if start_index == -1:  # 부분 문자열이 존재하지 않으면 그대로 반환합니다.
        return main_string
    end_index = start_index + len(substring)
    # 부분 문자열을 제거하고 새로운 문자열을 반환합니다.
    return main_string[:start_index] + main_string[end_index:]
 #
 def dbgmsg(string):
    if True == m_Debug:
        print(string)
 #
 def ExtractRAR(file_path, extract_path):
    with rarfile.RarFile(file_path, 'r') as rf:
        rf.extractall(extract_path)
 #
 def ExtractZIP(zip_file, extract_to):
    with zipfile.ZipFile(zip_file, 'r') as zf:
        zf.extractall(extract_to)
 #
 def CreateZIP(output_zip, *files):
    with zipfile.ZipFile(output_zip, 'w') as zf:
        for file in files:
            zf.write(file, os.path.basename(file))
    bRet = False
    if os.path.exists(output_zip):
        bRet = True
    return bRet
 #
 def ListSubDirectories(root_dir):
    subdirectories = []
    # root_dir에서 하위 디렉토리 및 파일 목록을 얻음
    for dirpath, dirnames, filenames in os.walk(root_dir):
        # 하위 디렉토리 목록을 반복하며 하위 디렉토리만 추출
        for dirname in dirnames:
            path = os.path.join(dirpath, dirname)
            if True == IsFinalFolder(path):
                subdirectories.append(path)
    return subdirectories
 #
 def IsFinalFolder(path):
    bRet = True
    contents = os.listdir(path)
    for item in contents:
        if True == os.path.isdir(item):
            bRt = False
            break
    return bRet;
 #
 def get_extension_str(file_path):
    # 파일 경로에서 마지막 점을 찾아 확장자를 추출
    last_dot_index = file_path.rfind('.')
    if last_dot_index == -1:
        return ""  # 점이 없는 경우 확장자가 없음
    else:
        return file_path[last_dot_index:]
 #
 def isRAR(file):
    if True == os.path.isdir(file):
        return False
    if ".rar" != get_extension_str(file):
        return False
    return True
 #
 def ReExt(path, srcExt, trgExt):
    nCnt = -1
    return nCnt
 # For Main Loop
 if __name__ == '__main__':
    main(False)