Untitled
unknown
plain_text
a year ago
5.4 kB
6
Indexable
import tensorflow as tf import numpy as np import cv2 import time import pyttsx3 import os import google.generativeai as genai from dotenv import load_dotenv engine = pyttsx3.init() def speak(text): engine.say(text) engine.runAndWait() # Load the best saved model model = tf.keras.models.load_model('img_1.h5') # Define image dimensions img_height, img_width = 224, 224 # Function to preprocess the image from the frame def preprocess_frame(frame): img = cv2.resize(frame, (img_height, img_width)) img_array = np.expand_dims(img, axis=0) # Add batch dimension img_array = img_array / 255.0 # Rescale the image return img_array # Function to make a prediction def predict_frame(frame): processed_frame = preprocess_frame(frame) prediction = model.predict(processed_frame) predicted_class = np.argmax(prediction, axis=1) return predicted_class # Class labels (replace with your actual class names) class_labels = ['A LOT', 'ABUSE', 'AFRAID', 'AGREE', 'ALL', 'ANGRY', 'ANY', 'ANYTHING', 'APPRECIATE', 'BAD', 'BEAUTIFUL', 'BECOME', 'BED', 'BORED', 'BRING', 'CHAT', 'CLASS', 'COLD', 'COLLEGE_SCHOOL', 'COMB', 'COME', 'CONGRATULATIONS', 'CRYING', 'DARE', 'DIFFERENCE', 'DILEMMA', 'DISAPPOINTED', 'DO', "DON'T CARE", 'ENJOY', 'FAVOUR', 'FEVER', 'FINE', 'FOOD', 'FREE', 'FRIEND', 'FROM', 'GLASS', 'GO', 'GOOD', 'GOT', 'GRATEFUL', 'HAD', 'HAPPENED', 'HAPPY', 'HEAR', 'HEART', 'HELLO_HI', 'HELP', 'HIDING', 'HOW', 'HUNGRY', 'HURT', 'I_ME_MINE_MY', 'KIND', 'KNOW', 'LEAVE', 'LIGHT', 'LIKE', 'LIKE_LOVE', 'MAKE', 'MEAN IT', 'MEDICINE', 'MEET', 'NAME', 'NEED', 'NEVER', 'NICE', 'NOT', 'NOW', 'NUMBER', 'OLD_AGE', 'ON THE WAY', 'ONWARDS', 'OUTSIDE', 'PHONE', 'PLACE', 'PLANNED', 'PLEASE', 'POUR', 'PREPARE', 'PROMISE', 'REALLY', 'REPEAT', 'ROOM', 'SERVE', 'SHIRT', 'SITTING', 'SLEEP', 'SLOWER', 'SO MUCH', 'SOFTLY', 'SOME HOW', 'SOME MORE', 'SOME ONE', 'SOMETHING', 'SORRY', 'SPEAK', 'STOP', 'STUBBORN', 'SURE', 'TAKE CARE', 'TAKE TIME', 'TALK', 'TELL', 'THANK', 'THAT', 'THERE', 'THINGS', 'THINK', 'THIRSTY', 'THIS ONE', 'TIRED', 'TODAY', 'TRAIN', 'TRUST', 'TRUTH', 'TURN ON', 'UNDERSTAND', 'VERY', 'WANT', 'WATER', 'WEAR', 'WELCOME', 'WHAT', 'WHEN', 'WHERE', 'WHO', 'WORRY', 'YOU'] # Open a connection to the webcam cap = cv2.VideoCapture(0) # Track the time of the last captured frame last_capture_time = time.time() # Initialize predicted_class_label and gestures list predicted_class_label = "No action" gestures = [] # Configure the Google API key load_dotenv() api_key= 'AIzaSyBxbz9c30oe0PsCIKx1v0F_Atyn-pNcG4Q' genai.configure(api_key=api_key) # Define generation configuration generation_config = {'temperature': 0.9, 'top_p': 1, 'top_k': 1, 'max_output_tokens': 100} def get_response(prompt, input_text): try: model = genai.GenerativeModel('gemini-pro') response = model.generate_content([prompt, input_text]) return response.text except Exception as e: return f"Error: {e}" def generate_sentences_from_gestures(gestures): gesture_string = ", ".join(gestures) prompt = f"I am creating a real-time sign language translation in a car. Here our passenger is deaf and dumb. So he is using sign language to communicate with the driver. The actions shown are: {gesture_string}. Generate a possible sentence that the passenger might be trying to communicate with the driver, remember that the action is by passenger not driver." response = get_response(prompt, '') return response while True: ret, frame = cap.read() if not ret: break current_time = time.time() # Check if 4 seconds have passed since the last capture if current_time - last_capture_time >= 4: # Predict the class of the current frame predicted_class = predict_frame(frame) predicted_class_label = class_labels[predicted_class[0]] gestures.append(predicted_class_label) # print(f"Captured gesture: {predicted_class_label}") # Update the last capture time last_capture_time = current_time # Display the frame with the prediction cv2.putText(frame, f'{predicted_class_label}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA) cv2.imshow('Real-Time Action Detection', frame) # Check if we have captured exactly 4 gestures if len(gestures) == 4: # Generate sentences based on the collected gestures generated_sentence = generate_sentences_from_gestures(gestures) print("Generated Sentence:") print(generated_sentence) # Chatbot response with a prompt chatbot_response = get_response("Passenger needs to communicate with LLM model . Respond appropriately as a bot.",generated_sentence) print("Chatbot Response:") print(chatbot_response) # Speak the chatbot response speak(chatbot_response) # Reset gestures list for next capture cycle gestures = [] # Clear the list for new gestures # Break the loop if 'q' key is pressed if cv2.waitKey(1) & 0xFF == ord('q'): break # Release the webcam and close all OpenCV windows cap.release() cv2.destroyAllWindows()
Editor is loading...
Leave a Comment