import os import shutil import pytesseract from PIL import Image import re from pytesseract import Output import cv2 import numpy as np # --- 사용자 설정 --- # 원본 이미지 폴더 경로 TOP_SOURCE_DIR = '/Volumes/ExSSD/Working/용공추 사진/' SOURCE_DIR = '/Volumes/ExSSD/Working/용공추 사진/Raw_Data_4' # 결과 저장 폴더 SERIAL_FOLDER = os.path.join(TOP_SOURCE_DIR, '일련번호_사진') NON_SERIAL_FOLDER = os.path.join(TOP_SOURCE_DIR, '일반_사진') # 일련번호 정규표현식 (기본: 5~6자리 숫자) #SERIAL_PATTERN = r'\b\d{5,6}\b' SERIAL_PATTERN = r' ' def find_number_below_security_light_tesseract(data): texts = data['text'] tops = data['top'] heights = data['height'] print(texts) # "보안등" 위치 찾기 for i, text in enumerate(texts): if '보안등' in text: base_y = tops[i] + heights[i] # 아래 기준점 candidates = [] for j, candidate_text in enumerate(texts): if re.fullmatch(r'\d+', candidate_text): # 숫자인 경우 if tops[j] > base_y + 5: # '보안등' 아래쪽에 있는지 확인 candidates.append((tops[j], candidate_text)) if not candidates: return None # y값이 가장 가까운(위에 있는) 숫자 반환 candidates.sort(key=lambda x: x[0]) return candidates[0][1] return None # "보안등"이 없으면 None def extract_serial_number(text): matches = re.findall(SERIAL_PATTERN, text) return matches[0] if matches else None def classify_and_extract(): if not os.path.exists(SERIAL_FOLDER): os.makedirs(SERIAL_FOLDER) if not os.path.exists(NON_SERIAL_FOLDER): os.makedirs(NON_SERIAL_FOLDER) for root, _, files in os.walk(SOURCE_DIR): for file in files: if file.lower().endswith(('.jpg', '.jpeg', '.png')): img_path = os.path.join(root, file) try: img = Image.open(img_path) text = pytesseract.image_to_string(img, lang='eng+kor') # 한국어+영어 혼합 OCR serial = extract_serial_number(text) if serial: dest_path = os.path.join(SERIAL_FOLDER, file) shutil.copy2(img_path, dest_path) print(f"[✓] {file} → 일련번호: {serial}") else: dest_path = os.path.join(NON_SERIAL_FOLDER, file) shutil.copy2(img_path, dest_path) print(f"[ ] {file} → 일반 사진") except Exception as e: print(f"[!] 오류: {file} → {e}") if __name__ == '__main__': # classify_and_extract() #img_path = "/Volumes/ExSSD/Working/용공추 사진/2,3월 데이터/Pole/20250218_114838.jpg" img_path = '/Volumes/ExSSD/Working/용공추 사진/2,3월 데이터/Pole/20250307_153821.jpg' #img_path ="/Volumes/ExSSD/Working/용공추 사진/2,3월 데이터/Pole/20250303_121704.jpg" image = Image.open(img_path) # image = cv2.imread(img_path, cv2.IMREAD_COLOR) # gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # kernel = np.ones((1, 1), np.uint8) # denoised = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel) data = pytesseract.image_to_data(image, output_type=Output.DICT, lang='eng+kor') number = find_number_below_security_light_tesseract(data) print("보안등 아래 숫자:", number) custom_config = r'--oem 3 --psm 6' text = pytesseract.image_to_string(image, lang='kor+eng', config=custom_config) print(text)