Untitled



 
import tensorflow as tf
import numpy as np
import cv2
import time
import pyttsx3
import os
import google.generativeai as genai
from dotenv import load_dotenv
 
engine = pyttsx3.init()
 
def speak(text):
    engine.say(text)
    engine.runAndWait()
 
# Load the best saved model
model = tf.keras.models.load_model('img_1.h5')
 
# Define image dimensions
img_height, img_width = 224, 224
 
# Function to preprocess the image from the frame
def preprocess_frame(frame):
    img = cv2.resize(frame, (img_height, img_width))
    img_array = np.expand_dims(img, axis=0)  # Add batch dimension
    img_array = img_array / 255.0  # Rescale the image
    return img_array
 
# Function to make a prediction
def predict_frame(frame):
    processed_frame = preprocess_frame(frame)
    prediction = model.predict(processed_frame)
    predicted_class = np.argmax(prediction, axis=1)
    return predicted_class
 
# Class labels (replace with your actual class names)
class_labels = ['A LOT', 'ABUSE', 'AFRAID', 'AGREE', 'ALL', 'ANGRY', 'ANY', 'ANYTHING', 'APPRECIATE', 'BAD',
                'BEAUTIFUL', 'BECOME', 'BED', 'BORED', 'BRING', 'CHAT', 'CLASS', 'COLD', 'COLLEGE_SCHOOL', 'COMB',
                'COME', 'CONGRATULATIONS', 'CRYING', 'DARE', 'DIFFERENCE', 'DILEMMA', 'DISAPPOINTED', 'DO', "DON'T CARE",
                'ENJOY', 'FAVOUR', 'FEVER', 'FINE', 'FOOD', 'FREE', 'FRIEND', 'FROM', 'GLASS', 'GO', 'GOOD', 'GOT',
                'GRATEFUL', 'HAD', 'HAPPENED', 'HAPPY', 'HEAR', 'HEART', 'HELLO_HI', 'HELP', 'HIDING', 'HOW', 'HUNGRY',
                'HURT', 'I_ME_MINE_MY', 'KIND', 'KNOW', 'LEAVE', 'LIGHT', 'LIKE', 'LIKE_LOVE', 'MAKE', 'MEAN IT',
                'MEDICINE', 'MEET', 'NAME', 'NEED', 'NEVER', 'NICE', 'NOT', 'NOW', 'NUMBER', 'OLD_AGE', 'ON THE WAY',
                'ONWARDS', 'OUTSIDE', 'PHONE', 'PLACE', 'PLANNED', 'PLEASE', 'POUR', 'PREPARE', 'PROMISE', 'REALLY',
                'REPEAT', 'ROOM', 'SERVE', 'SHIRT', 'SITTING', 'SLEEP', 'SLOWER', 'SO MUCH', 'SOFTLY', 'SOME HOW',
                'SOME MORE', 'SOME ONE', 'SOMETHING', 'SORRY', 'SPEAK', 'STOP', 'STUBBORN', 'SURE', 'TAKE CARE',
                'TAKE TIME', 'TALK', 'TELL', 'THANK', 'THAT', 'THERE', 'THINGS', 'THINK', 'THIRSTY', 'THIS ONE',
                'TIRED', 'TODAY', 'TRAIN', 'TRUST', 'TRUTH', 'TURN ON', 'UNDERSTAND', 'VERY', 'WANT', 'WATER', 'WEAR',
                'WELCOME', 'WHAT', 'WHEN', 'WHERE', 'WHO', 'WORRY', 'YOU']
 
# Open a connection to the webcam
cap = cv2.VideoCapture(0)
 
# Track the time of the last captured frame
last_capture_time = time.time()
 
# Initialize predicted_class_label and gestures list
predicted_class_label = "No action"
gestures = []
 
# Configure the Google API key
load_dotenv()
api_key= 'AIzaSyBxbz9c30oe0PsCIKx1v0F_Atyn-pNcG4Q'
genai.configure(api_key=api_key)
 
# Define generation configuration
generation_config = {'temperature': 0.9, 'top_p': 1, 'top_k': 1, 'max_output_tokens': 100}
 
def get_response(prompt, input_text):
    try:
        model = genai.GenerativeModel('gemini-pro')
        response = model.generate_content([prompt, input_text])
        return response.text
    except Exception as e:
        return f"Error: {e}"
 
def generate_sentences_from_gestures(gestures):
    gesture_string = ", ".join(gestures)
    prompt = f"I am creating a real-time sign language translation in a car. Here our passenger is deaf and dumb. So he is using sign language to communicate with the driver. The actions shown are: {gesture_string}. Generate a possible sentence that the passenger might be trying to communicate with the driver, remember that the action is by passenger not driver."
    response = get_response(prompt, '')
    return response
 
while True:
    ret, frame = cap.read()
    if not ret:
        break
 
    current_time = time.time()
    # Check if 4 seconds have passed since the last capture
    if current_time - last_capture_time >= 4:
        # Predict the class of the current frame
        predicted_class = predict_frame(frame)
        predicted_class_label = class_labels[predicted_class[0]]
        gestures.append(predicted_class_label)
 
        # print(f"Captured gesture: {predicted_class_label}")
 
        # Update the last capture time
        last_capture_time = current_time
 
    # Display the frame with the prediction
    cv2.putText(frame, f'{predicted_class_label}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
    cv2.imshow('Real-Time Action Detection', frame)
 
    # Check if we have captured exactly 4 gestures
    if len(gestures) == 4:
        # Generate sentences based on the collected gestures
        generated_sentence = generate_sentences_from_gestures(gestures)
        print("Generated Sentence:")
        print(generated_sentence)
 
        # Chatbot response with a prompt
        chatbot_response = get_response("Passenger needs to communicate with LLM model . Respond appropriately as a bot.",generated_sentence)
        print("Chatbot Response:")
        print(chatbot_response)
 
        # Speak the chatbot response
        speak(chatbot_response)
 
        # Reset gestures list for next capture cycle
        gestures = []  # Clear the list for new gestures
 
    # Break the loop if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
 
# Release the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()
Editor is loading...