Untitled

 avatar
unknown
python
5 months ago
2.3 kB
6
Indexable
import cv2
from paddleocr import PaddleOCR, draw_ocr
from PIL import Image

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')  # Use appropriate language

# Path to the input image
image_path = "members.png"

# Step 1: Load the image and preprocess it
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
blurred = cv2.GaussianBlur(gray, (5, 5), 0)    # Apply Gaussian blur
_, thresh = cv2.threshold(blurred, 127, 255, cv2.THRESH_BINARY_INV)  # Threshold the image

# Step 2: Find contours
contours, _ = cv2.findContours(blurred, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Step 3: Sort contours by area and keep the largest
sorted_contours = sorted(contours, key=cv2.contourArea, reverse=True)[:1]

# Step 4: Extract bounding boxes of the six largest contours
bounding_boxes = [cv2.boundingRect(contour) for contour in sorted_contours]

# Draw the bounding boxes on the original image for visualization
for (x, y, w, h) in bounding_boxes:
    cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)  # Green box

# Save the visualization image
cv2.imwrite("bounding_boxes.jpg", image)

# Step 5: Perform OCR on each bounding box
results = []
for (x, y, w, h) in bounding_boxes:
    # Crop the region of interest
    roi = image[y:y + h, x:x + w]
    # Perform OCR
    result = ocr.ocr(roi, cls=True)
    results.append(result)

# Print OCR results
for idx, result in enumerate(results):
    print(f"Bounding Box {idx + 1}:")
    for line in result[0]:
        print(f"Detected Text: {line[1][0]} | Confidence: {line[1][1]}")

# Optional: Visualize results (using PIL and PaddleOCR draw functions)
# image_pil = Image.open(image_path).convert('RGB')  # Reload the original image
image_pil = Image.fromarray(roi).convert('RGB')
for idx, (x, y, w, h) in enumerate(bounding_boxes):
    roi_boxes = [line[0] for line in results[idx][0]]
    roi_texts = [line[1][0] for line in results[idx][0]]
    roi_scores = [line[1][1] for line in results[idx][0]]

    # Draw OCR results
    annotated_image = draw_ocr(image_pil, roi_boxes, roi_texts, roi_scores, font_path='arial.ttf')
    annotated_image = Image.fromarray(annotated_image)
    annotated_image.save(f"ocr_result_box_{idx + 1}.jpg")
Editor is loading...
Leave a Comment