本文將說明如何去辨識出圖片文字位置及高寬。
import cv2
import numpy as np
def read_posion(img):
'''
輸入背景黑色,物件白色的圖
'''
num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(img, connectivity=8)
components = []
# boxes_data = []
for i in range(1, num_labels): # 跳過背景
x, y, w, h, _ = stats[i]
components.append((x, y, w, h))
components.sort(key=lambda c: c[0]) # 按 x 座標排序
# 合併 x 軸在正負5範圍內的OCR
merged_components = []
current_component = list(components[0])
for i in range(1, len(components)):
if abs(components[i][0] - current_component[0]) <= 5:
current_component[0] = min(current_component[0], components[i][0]) # X 取最小值
current_component[1] = min(current_component[1], components[i][1]) # Y 取最小值
current_component[2] = max(current_component[2], components[i][2]) # w 取最大值
current_component[3] = abs(components[i][1] - current_component[1]) + components[i][3] # h 取 Y2 - Y1 + H2
else:
merged_components.append(tuple(current_component[:4]))
current_component = list(components[i][:4])
#合併最後一個OCR結果
merged_components.append(tuple(current_component[:4]))
return merged_components
img = cv2.imread(f'圖片路徑')
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
box = read_posion(gray_img)
for i,data in enumerate(box):
x,y,h,w = data
#印出OCR 位置,高寬
print(f'第{i}個OCR,x:{x},y:{y},h:{h},w:{w}')
num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(img, connectivity=8)
components = []
for i in range(1, num_labels): # 跳過背景
x, y, w, h, _ = stats[i]
components.append((x, y, w, h))
components.sort(key=lambda c: c[0])
merged_components = []
current_component = list(components[0])
for i in range(1, len(components)):
if abs(components[i][0] - current_component[0]) <= 5:
current_component[0] = min(current_component[0], components[i][0]) # X 取最小值
current_component[1] = min(current_component[1], components[i][1]) # Y 取最小值
current_component[2] = max(current_component[2], components[i][2]) # w 取最大值
current_component[3] = abs(components[i][1] - current_component[1]) + components[i][3] # h 取 Y2 - Y1 + H2
else:
merged_components.append(tuple(current_component[:4]))
current_component = list(components[i][:4])
merged_components.append(tuple(current_component[:4]))
return merged_components