Untitled
unknown
python
a year ago
2.3 kB
8
Indexable
import cv2
from paddleocr import PaddleOCR, draw_ocr
from PIL import Image
# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en') # Use appropriate language
# Path to the input image
image_path = "members.png"
# Step 1: Load the image and preprocess it
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Convert to grayscale
blurred = cv2.GaussianBlur(gray, (5, 5), 0) # Apply Gaussian blur
_, thresh = cv2.threshold(blurred, 127, 255, cv2.THRESH_BINARY_INV) # Threshold the image
# Step 2: Find contours
contours, _ = cv2.findContours(blurred, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Step 3: Sort contours by area and keep the largest
sorted_contours = sorted(contours, key=cv2.contourArea, reverse=True)[:1]
# Step 4: Extract bounding boxes of the six largest contours
bounding_boxes = [cv2.boundingRect(contour) for contour in sorted_contours]
# Draw the bounding boxes on the original image for visualization
for (x, y, w, h) in bounding_boxes:
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2) # Green box
# Save the visualization image
cv2.imwrite("bounding_boxes.jpg", image)
# Step 5: Perform OCR on each bounding box
results = []
for (x, y, w, h) in bounding_boxes:
# Crop the region of interest
roi = image[y:y + h, x:x + w]
# Perform OCR
result = ocr.ocr(roi, cls=True)
results.append(result)
# Print OCR results
for idx, result in enumerate(results):
print(f"Bounding Box {idx + 1}:")
for line in result[0]:
print(f"Detected Text: {line[1][0]} | Confidence: {line[1][1]}")
# Optional: Visualize results (using PIL and PaddleOCR draw functions)
# image_pil = Image.open(image_path).convert('RGB') # Reload the original image
image_pil = Image.fromarray(roi).convert('RGB')
for idx, (x, y, w, h) in enumerate(bounding_boxes):
roi_boxes = [line[0] for line in results[idx][0]]
roi_texts = [line[1][0] for line in results[idx][0]]
roi_scores = [line[1][1] for line in results[idx][0]]
# Draw OCR results
annotated_image = draw_ocr(image_pil, roi_boxes, roi_texts, roi_scores, font_path='arial.ttf')
annotated_image = Image.fromarray(annotated_image)
annotated_image.save(f"ocr_result_box_{idx + 1}.jpg")
Editor is loading...
Leave a Comment