108 lines
3.8 KiB
Python
108 lines
3.8 KiB
Python
import os
|
|
import shutil
|
|
import pytesseract
|
|
from PIL import Image
|
|
import re
|
|
from pytesseract import Output
|
|
import cv2
|
|
import numpy as np
|
|
|
|
# --- 사용자 설정 ---
|
|
# 원본 이미지 폴더 경로
|
|
TOP_SOURCE_DIR = '/Volumes/ExSSD/Working/용공추 사진/'
|
|
SOURCE_DIR = '/Volumes/ExSSD/Working/용공추 사진/Raw_Data_4'
|
|
|
|
# 결과 저장 폴더
|
|
SERIAL_FOLDER = os.path.join(TOP_SOURCE_DIR, '일련번호_사진')
|
|
NON_SERIAL_FOLDER = os.path.join(TOP_SOURCE_DIR, '일반_사진')
|
|
|
|
# 일련번호 정규표현식 (기본: 5~6자리 숫자)
|
|
#SERIAL_PATTERN = r'\b\d{5,6}\b'
|
|
SERIAL_PATTERN = r' '
|
|
|
|
def find_number_below_security_light_tesseract(data):
|
|
texts = data['text']
|
|
tops = data['top']
|
|
heights = data['height']
|
|
|
|
print(texts)
|
|
|
|
# "보안등" 위치 찾기
|
|
for i, text in enumerate(texts):
|
|
if '보안등' in text:
|
|
base_y = tops[i] + heights[i] # 아래 기준점
|
|
|
|
candidates = []
|
|
for j, candidate_text in enumerate(texts):
|
|
if re.fullmatch(r'\d+', candidate_text): # 숫자인 경우
|
|
if tops[j] > base_y + 5: # '보안등' 아래쪽에 있는지 확인
|
|
candidates.append((tops[j], candidate_text))
|
|
|
|
if not candidates:
|
|
return None
|
|
|
|
# y값이 가장 가까운(위에 있는) 숫자 반환
|
|
candidates.sort(key=lambda x: x[0])
|
|
return candidates[0][1]
|
|
|
|
return None # "보안등"이 없으면 None
|
|
|
|
|
|
def extract_serial_number(text):
|
|
matches = re.findall(SERIAL_PATTERN, text)
|
|
return matches[0] if matches else None
|
|
|
|
|
|
def classify_and_extract():
|
|
if not os.path.exists(SERIAL_FOLDER):
|
|
os.makedirs(SERIAL_FOLDER)
|
|
if not os.path.exists(NON_SERIAL_FOLDER):
|
|
os.makedirs(NON_SERIAL_FOLDER)
|
|
|
|
for root, _, files in os.walk(SOURCE_DIR):
|
|
for file in files:
|
|
if file.lower().endswith(('.jpg', '.jpeg', '.png')):
|
|
img_path = os.path.join(root, file)
|
|
|
|
try:
|
|
img = Image.open(img_path)
|
|
text = pytesseract.image_to_string(img, lang='eng+kor') # 한국어+영어 혼합 OCR
|
|
serial = extract_serial_number(text)
|
|
|
|
if serial:
|
|
dest_path = os.path.join(SERIAL_FOLDER, file)
|
|
shutil.copy2(img_path, dest_path)
|
|
print(f"[✓] {file} → 일련번호: {serial}")
|
|
else:
|
|
dest_path = os.path.join(NON_SERIAL_FOLDER, file)
|
|
shutil.copy2(img_path, dest_path)
|
|
print(f"[ ] {file} → 일반 사진")
|
|
except Exception as e:
|
|
print(f"[!] 오류: {file} → {e}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# classify_and_extract()
|
|
|
|
#img_path = "/Volumes/ExSSD/Working/용공추 사진/2,3월 데이터/Pole/20250218_114838.jpg"
|
|
img_path = '/Volumes/ExSSD/Working/용공추 사진/2,3월 데이터/Pole/20250307_153821.jpg'
|
|
#img_path ="/Volumes/ExSSD/Working/용공추 사진/2,3월 데이터/Pole/20250303_121704.jpg"
|
|
|
|
image = Image.open(img_path)
|
|
|
|
|
|
# image = cv2.imread(img_path, cv2.IMREAD_COLOR)
|
|
# gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
# _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
# kernel = np.ones((1, 1), np.uint8)
|
|
# denoised = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
|
|
|
|
data = pytesseract.image_to_data(image, output_type=Output.DICT, lang='eng+kor')
|
|
|
|
number = find_number_below_security_light_tesseract(data)
|
|
print("보안등 아래 숫자:", number)
|
|
|
|
custom_config = r'--oem 3 --psm 6'
|
|
text = pytesseract.image_to_string(image, lang='kor+eng', config=custom_config)
|
|
|
|
print(text) |