Untitled
unknown
plain_text
a year ago
5.4 kB
8
Indexable
import tensorflow as tf
import numpy as np
import cv2
import time
import pyttsx3
import os
import google.generativeai as genai
from dotenv import load_dotenv
engine = pyttsx3.init()
def speak(text):
engine.say(text)
engine.runAndWait()
# Load the best saved model
model = tf.keras.models.load_model('img_1.h5')
# Define image dimensions
img_height, img_width = 224, 224
# Function to preprocess the image from the frame
def preprocess_frame(frame):
img = cv2.resize(frame, (img_height, img_width))
img_array = np.expand_dims(img, axis=0) # Add batch dimension
img_array = img_array / 255.0 # Rescale the image
return img_array
# Function to make a prediction
def predict_frame(frame):
processed_frame = preprocess_frame(frame)
prediction = model.predict(processed_frame)
predicted_class = np.argmax(prediction, axis=1)
return predicted_class
# Class labels (replace with your actual class names)
class_labels = ['A LOT', 'ABUSE', 'AFRAID', 'AGREE', 'ALL', 'ANGRY', 'ANY', 'ANYTHING', 'APPRECIATE', 'BAD',
'BEAUTIFUL', 'BECOME', 'BED', 'BORED', 'BRING', 'CHAT', 'CLASS', 'COLD', 'COLLEGE_SCHOOL', 'COMB',
'COME', 'CONGRATULATIONS', 'CRYING', 'DARE', 'DIFFERENCE', 'DILEMMA', 'DISAPPOINTED', 'DO', "DON'T CARE",
'ENJOY', 'FAVOUR', 'FEVER', 'FINE', 'FOOD', 'FREE', 'FRIEND', 'FROM', 'GLASS', 'GO', 'GOOD', 'GOT',
'GRATEFUL', 'HAD', 'HAPPENED', 'HAPPY', 'HEAR', 'HEART', 'HELLO_HI', 'HELP', 'HIDING', 'HOW', 'HUNGRY',
'HURT', 'I_ME_MINE_MY', 'KIND', 'KNOW', 'LEAVE', 'LIGHT', 'LIKE', 'LIKE_LOVE', 'MAKE', 'MEAN IT',
'MEDICINE', 'MEET', 'NAME', 'NEED', 'NEVER', 'NICE', 'NOT', 'NOW', 'NUMBER', 'OLD_AGE', 'ON THE WAY',
'ONWARDS', 'OUTSIDE', 'PHONE', 'PLACE', 'PLANNED', 'PLEASE', 'POUR', 'PREPARE', 'PROMISE', 'REALLY',
'REPEAT', 'ROOM', 'SERVE', 'SHIRT', 'SITTING', 'SLEEP', 'SLOWER', 'SO MUCH', 'SOFTLY', 'SOME HOW',
'SOME MORE', 'SOME ONE', 'SOMETHING', 'SORRY', 'SPEAK', 'STOP', 'STUBBORN', 'SURE', 'TAKE CARE',
'TAKE TIME', 'TALK', 'TELL', 'THANK', 'THAT', 'THERE', 'THINGS', 'THINK', 'THIRSTY', 'THIS ONE',
'TIRED', 'TODAY', 'TRAIN', 'TRUST', 'TRUTH', 'TURN ON', 'UNDERSTAND', 'VERY', 'WANT', 'WATER', 'WEAR',
'WELCOME', 'WHAT', 'WHEN', 'WHERE', 'WHO', 'WORRY', 'YOU']
# Open a connection to the webcam
cap = cv2.VideoCapture(0)
# Track the time of the last captured frame
last_capture_time = time.time()
# Initialize predicted_class_label and gestures list
predicted_class_label = "No action"
gestures = []
# Configure the Google API key
load_dotenv()
api_key= 'AIzaSyBxbz9c30oe0PsCIKx1v0F_Atyn-pNcG4Q'
genai.configure(api_key=api_key)
# Define generation configuration
generation_config = {'temperature': 0.9, 'top_p': 1, 'top_k': 1, 'max_output_tokens': 100}
def get_response(prompt, input_text):
try:
model = genai.GenerativeModel('gemini-pro')
response = model.generate_content([prompt, input_text])
return response.text
except Exception as e:
return f"Error: {e}"
def generate_sentences_from_gestures(gestures):
gesture_string = ", ".join(gestures)
prompt = f"I am creating a real-time sign language translation in a car. Here our passenger is deaf and dumb. So he is using sign language to communicate with the driver. The actions shown are: {gesture_string}. Generate a possible sentence that the passenger might be trying to communicate with the driver, remember that the action is by passenger not driver."
response = get_response(prompt, '')
return response
while True:
ret, frame = cap.read()
if not ret:
break
current_time = time.time()
# Check if 4 seconds have passed since the last capture
if current_time - last_capture_time >= 4:
# Predict the class of the current frame
predicted_class = predict_frame(frame)
predicted_class_label = class_labels[predicted_class[0]]
gestures.append(predicted_class_label)
# print(f"Captured gesture: {predicted_class_label}")
# Update the last capture time
last_capture_time = current_time
# Display the frame with the prediction
cv2.putText(frame, f'{predicted_class_label}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
cv2.imshow('Real-Time Action Detection', frame)
# Check if we have captured exactly 4 gestures
if len(gestures) == 4:
# Generate sentences based on the collected gestures
generated_sentence = generate_sentences_from_gestures(gestures)
print("Generated Sentence:")
print(generated_sentence)
# Chatbot response with a prompt
chatbot_response = get_response("Passenger needs to communicate with LLM model . Respond appropriately as a bot.",generated_sentence)
print("Chatbot Response:")
print(chatbot_response)
# Speak the chatbot response
speak(chatbot_response)
# Reset gestures list for next capture cycle
gestures = [] # Clear the list for new gestures
# Break the loop if 'q' key is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()Editor is loading...
Leave a Comment