Untitled

import pytesseract
import cv2
from PIL import Image
import os
import pyttsx3
import numpy as np
from gpiozero import Button  # GPIO button support

# Initialize button
button2 = Button(17)  # Button connected to GPIO 17

# Initialize text-to-speech (eSpeak)
engine = pyttsx3.init()
engine.setProperty('rate', 125)
engine.setProperty('voice', 'en+f3')  # Set to female voice

# Load object detection model
prototxt_path = "MobileNetSSD_deploy.prototxt"
model_path = "MobileNetSSD_deploy.caffemodel"

# Check if the files exist
if not os.path.exists(prototxt_path) or not os.path.exists(model_path):
    print("Error: Model files not found!")
    exit()

net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path)

# List of class labels for MobileNet SSD
class_labels = ["background", "aeroplane", "bicycle", "bird", "boat",
                "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
                "dog", "horse", "motorbike", "person", "pottedplant",
                "sheep", "sofa", "train", "tvmonitor"]

# List of Philippine Peso denominations
peso_denominations = ["20", "50", "100", "200", "500", "1000"]

webcam = cv2.VideoCapture(0)

def capture_image():
    print("Capturing image...")
    check, frame = webcam.read()
    if check:
        cv2.imwrite(filename='saved_img.jpg', img=frame)
        print("Image saved!")

        # Perform OCR to detect text
        text_detected = pytesseract.image_to_string('saved_img.jpg').strip()
        print("OCR Detected Text:", text_detected)

        # Speak the detected text (NEWLY ADDED)
        if text_detected:
            engine.say("I detected the text: " + text_detected)
            print("Speaking detected text: " + text_detected)
        else:
            engine.say("No readable text detected.")
            print("No readable text detected.")

        # Check if any peso denomination is found in text
        detected_peso = [denom for denom in peso_denominations if denom in text_detected]

        # Perform Object Detection
        (h, w) = frame.shape[:2]
        blob = cv2.dnn.blobFromImage(frame, 0.007843, (300, 300), 127.5)
        net.setInput(blob)
        detections = net.forward()

        detected_objects = []
        for i in range(detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            if confidence > 0.3:  # Lowered threshold to detect more objects
                idx = int(detections[0, 0, i, 1])
                label = class_labels[idx]
                detected_objects.append(label)

        # Speak the detected objects
        if detected_objects:
            object_sentence = "I detected " + ", ".join(detected_objects) + "."
            engine.say(object_sentence)
            print(object_sentence)  # Print what is being spoken
        else:
            engine.say("No objects detected.")
            print("No objects detected.")

        # Speak the detected Philippine Peso bill
        if detected_peso:
            peso_sentence = "I detected a Philippine Peso bill of " + ", ".join(detected_peso) + " pesos."
            engine.say(peso_sentence)
            print(peso_sentence)
        else:
            engine.say("No Peso bill detected.")
            print("No Peso bill detected.")

        engine.runAndWait()

# Assign button2 to capture an image
button2.when_pressed = capture_image  # Button 2 triggers image capture

print("Press Button 2 to capture an image.")

while True:
    try:
        check, frame = webcam.read()
        cv2.imshow("Capturing", frame)
        key = cv2.waitKey(1)

        if key == ord('z'):  # Capture image when 'z' is pressed
            capture_image()

    except KeyboardInterrupt:
        print("Turning off camera.")
        webcam.release()
        print("Camera off.")
        print("Program ended.")
        cv2.destroyAllWindows()
        break
Editor is loading...