First commit

This commit is contained in:
2025-09-23 20:59:17 +09:00
commit 37267985b4
14 changed files with 2308 additions and 0 deletions

108
Pole_serial_sorter.py Normal file
View File

@@ -0,0 +1,108 @@
import os
import shutil
import pytesseract
from PIL import Image
import re
from pytesseract import Output
import cv2
import numpy as np
# --- 사용자 설정 ---
# 원본 이미지 폴더 경로
TOP_SOURCE_DIR = '/Volumes/ExSSD/Working/용공추 사진/'
SOURCE_DIR = '/Volumes/ExSSD/Working/용공추 사진/Raw_Data_4'
# 결과 저장 폴더
SERIAL_FOLDER = os.path.join(TOP_SOURCE_DIR, '일련번호_사진')
NON_SERIAL_FOLDER = os.path.join(TOP_SOURCE_DIR, '일반_사진')
# 일련번호 정규표현식 (기본: 5~6자리 숫자)
#SERIAL_PATTERN = r'\b\d{5,6}\b'
SERIAL_PATTERN = r' '
def find_number_below_security_light_tesseract(data):
texts = data['text']
tops = data['top']
heights = data['height']
print(texts)
# "보안등" 위치 찾기
for i, text in enumerate(texts):
if '보안등' in text:
base_y = tops[i] + heights[i] # 아래 기준점
candidates = []
for j, candidate_text in enumerate(texts):
if re.fullmatch(r'\d+', candidate_text): # 숫자인 경우
if tops[j] > base_y + 5: # '보안등' 아래쪽에 있는지 확인
candidates.append((tops[j], candidate_text))
if not candidates:
return None
# y값이 가장 가까운(위에 있는) 숫자 반환
candidates.sort(key=lambda x: x[0])
return candidates[0][1]
return None # "보안등"이 없으면 None
def extract_serial_number(text):
matches = re.findall(SERIAL_PATTERN, text)
return matches[0] if matches else None
def classify_and_extract():
if not os.path.exists(SERIAL_FOLDER):
os.makedirs(SERIAL_FOLDER)
if not os.path.exists(NON_SERIAL_FOLDER):
os.makedirs(NON_SERIAL_FOLDER)
for root, _, files in os.walk(SOURCE_DIR):
for file in files:
if file.lower().endswith(('.jpg', '.jpeg', '.png')):
img_path = os.path.join(root, file)
try:
img = Image.open(img_path)
text = pytesseract.image_to_string(img, lang='eng+kor') # 한국어+영어 혼합 OCR
serial = extract_serial_number(text)
if serial:
dest_path = os.path.join(SERIAL_FOLDER, file)
shutil.copy2(img_path, dest_path)
print(f"[✓] {file} → 일련번호: {serial}")
else:
dest_path = os.path.join(NON_SERIAL_FOLDER, file)
shutil.copy2(img_path, dest_path)
print(f"[ ] {file} → 일반 사진")
except Exception as e:
print(f"[!] 오류: {file}{e}")
if __name__ == '__main__':
# classify_and_extract()
#img_path = "/Volumes/ExSSD/Working/용공추 사진/2,3월 데이터/Pole/20250218_114838.jpg"
img_path = '/Volumes/ExSSD/Working/용공추 사진/2,3월 데이터/Pole/20250307_153821.jpg'
#img_path ="/Volumes/ExSSD/Working/용공추 사진/2,3월 데이터/Pole/20250303_121704.jpg"
image = Image.open(img_path)
# image = cv2.imread(img_path, cv2.IMREAD_COLOR)
# gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# kernel = np.ones((1, 1), np.uint8)
# denoised = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
data = pytesseract.image_to_data(image, output_type=Output.DICT, lang='eng+kor')
number = find_number_below_security_light_tesseract(data)
print("보안등 아래 숫자:", number)
custom_config = r'--oem 3 --psm 6'
text = pytesseract.image_to_string(image, lang='kor+eng', config=custom_config)
print(text)