Untitled
unknown
plain_text
a year ago
9.1 kB
6
Indexable
[11:15 AM] Najiya Mujeeb import streamlit as st import tensorflow as tf import numpy as np import cv2 import time import pyttsx3 import os from PIL import Image from dotenv import load_dotenv import google.generativeai as genai import io # Streamlit app configuration st.set_page_config( page_title="Sign Language Detection Bot", page_icon="", layout="wide", initial_sidebar_state="expanded", ) # Sidebar options st.sidebar.title("Sign Language Detection Bot") input_option = st.sidebar.selectbox( "Choose input method", ["Upload Images", "Use Webcam", "Capture from Webcam"] ) # # Load the best saved model # model = tf.keras.models.load_model('mod_gd.h5') try: model = tf.keras.models.load_model('mod_gd.h5') except Exception as e: print("Error loading the model:", e) # Define image dimensions img_height, img_width = 224, 224 # Preprocess image function def preprocess_frame(frame): img = cv2.resize(frame, (img_height, img_width)) img_array = np.expand_dims(img, axis=0) # Add batch dimension img_array = img_array / 255.0 # Rescale the image return img_array # Predict class from image def predict_frame(frame): processed_frame = preprocess_frame(frame) prediction = model.predict(processed_frame) predicted_class = np.argmax(prediction, axis=1) return predicted_class # Class labels (replace with your actual names) class_labels = ['A LOT', 'ABUSE', 'AFRAID', 'AGREE', 'ALL', 'ANGRY', 'ANY', 'ANYTHING', 'APPRECIATE', 'BAD', 'BEAUTIFUL', 'BECOME', 'BED', 'BORED', 'BRING', 'CHAT', 'CLASS', 'COLD', 'COLLEGE_SCHOOL', 'COMB', 'COME', 'CONGRATULATIONS', 'CRYING', 'DARE', 'DIFFERENCE', 'DILEMMA', 'DISAPPOINTED', 'DO', "DON'T CARE", 'ENJOY', 'FAVOUR', 'FEVER', 'FINE', 'FOOD', 'FREE', 'FRIEND', 'FROM', 'GLASS', 'GO', 'GOOD', 'GOT', 'GRATEFUL', 'HAD', 'HAPPENED', 'HAPPY', 'HEAR', 'HEART', 'HELLO_HI', 'HELP', 'HIDING', 'HOW', 'HUNGRY', 'HURT', 'I_ME_MINE_MY', 'KIND', 'KNOW', 'LEAVE', 'LIGHT', 'LIKE', 'LIKE_LOVE', 'MAKE', 'MEAN IT', 'MEDICINE', 'MEET', 'NAME', 'NEED', 'NEVER', 'NICE', 'NOT', 'NOW', 'NUMBER', 'OLD_AGE', 'ON THE WAY', 'ONWARDS', 'OUTSIDE', 'PHONE', 'PLACE', 'PLANNED', 'PLEASE', 'POUR', 'PREPARE', 'PROMISE', 'REALLY', 'REPEAT', 'ROOM', 'SERVE', 'SHIRT', 'SITTING', 'SLEEP', 'SLOWER', 'SO MUCH', 'SOFTLY', 'SOME HOW', 'SOME MORE', 'SOME ONE', 'SOMETHING', 'SORRY', 'SPEAK', 'STOP', 'STUBBORN', 'SURE', 'TAKE CARE', 'TAKE TIME', 'TALK', 'TELL', 'THANK', 'THAT', 'THERE', 'THINGS', 'THINK', 'THIRSTY', 'THIS ONE', 'TIRED', 'TODAY', 'TRAIN', 'TRUST', 'TRUTH', 'TURN ON', 'UNDERSTAND', 'VERY', 'WANT', 'WATER', 'WEAR', 'WELCOME', 'WHAT', 'WHEN', 'WHERE', 'WHO', 'WORRY', 'YOU'] # Ensure API key is set securely using environment variables # load_dotenv() # api_key = os.getenv("GOOGLE_API_KEY") load_dotenv() api_key= 'AIzaSyBxbz9c30oe0PsCIKx1v0F_Atyn-pNcG4Q' genai.configure(api_key=api_key) # Guard against missing API key if not api_key: st.error('Please set your Google Generative AI API key in the ".env" file.') exit(1) # genai.configure(api_key=api_key) # Generation configuration generation_config = {'temperature': 0.9, 'top_p': 1, 'top_k': 1, 'max_output_tokens': 100} def get_response(prompt, input_text): try: model = genai.GenerativeModel('gemini-pro') response = model.generate_content([prompt, input_text]) return response.text except Exception as e: return f"Error: {e}" def generate_sentences_from_gestures(gestures): gesture_string = ", ".join(gestures) prompt = f"I am creating a real-time sign language translation in a car. Here our passenger is deaf and dumb. So he is using sign language to communicate with the driver. The actions shown are: {gesture_string}. Generate a possible sentence that the passenger might be trying to communicate with the driver, remember that the action is by passenger not driver." response = get_response(prompt, '') return response # Text-to-speech engine # engine = pyttsx3.init() # def speak(text): # if text: # engine.say(text) # # Workaround for RuntimeError: run loop already started # engine.iterate() # Main UI logic st.title("Sign Language Detection Bot") # st.write( # "Upload four images of sign language gestures or use your webcam to capture them in real-time. The bot will detect the signs, generate a sentence, and provide a response.") gestures = [] if input_option == "Upload Images": uploaded_files = st.file_uploader("Choose four images", accept_multiple_files=True, type=["jpg", "png", "jpeg"]) if len(uploaded_files) == 4: for uploaded_file in uploaded_files: image = Image.open(uploaded_file) frame = np.array(image) predicted_class = predict_frame(frame) predicted_class_label = class_labels[predicted_class[0]] gestures.append(predicted_class_label) s = f"<p style='font-size:24px;'>Detected sign: {predicted_class_label}</p>" st.markdown(s, unsafe_allow_html=True) st.image(image, use_column_width=True) if len(gestures) == 4: generated_sentence = generate_sentences_from_gestures(gestures) st.write("Generated Sentence:") st.write(generated_sentence) chatbot_response = get_response( "Passenger needs to communicate with LLM model. Respond appropriately as a bot.", generated_sentence) st.write("Chatbot Response:") st.write(chatbot_response) # speak(chatbot_response) # engine.runAndWait() elif input_option == "Use Webcam": st.write("Webcam live detection. Show gestures to the webcam.") cap = cv2.VideoCapture(0) last_capture_time = time.time() captured_images = [] capture_interval = 3 # Interval between each image capture in seconds wait_after_response = 5 # Wait time after displaying chatbot response in seconds while cap.isOpened(): ret, frame = cap.read() if not ret: break current_time = time.time() if current_time - last_capture_time >= capture_interval and len(captured_images) < 4: captured_images.append(frame) last_capture_time = current_time st.image(frame, channels="BGR", use_column_width=True) if len(captured_images) == 4: gestures = [] for captured_image in captured_images: predicted_class = predict_frame(captured_image) predicted_class_label = class_labels[predicted_class[0]] gestures.append(predicted_class_label) if len(gestures) == 4: generated_sentence = generate_sentences_from_gestures(gestures) st.write("Sentence:") st.write(generated_sentence) chatbot_response = get_response( "Passenger needs to communicate with LLM model. Passenger is deaf and dumb. so he is using sign language. Respond appropriately as a bot. bot has to respond in meaningful sentences", generated_sentence) st.write("Chatbot Response:") st.write(chatbot_response) time.sleep(wait_after_response) # Wait after displaying chatbot response captured_images = [] if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows() elif input_option == "Capture from Webcam": st.write("Take four pictures using your webcam.") img_file_buffers = [] for i in range(4): img_file_buffer = st.camera_input(f"Take picture {i + 1}") if img_file_buffer is not None: img_file_buffers.append(img_file_buffer) if len(img_file_buffers) == 4: for img_file_buffer in img_file_buffers: img = Image.open(img_file_buffer) img_array = np.array(img) predicted_class = predict_frame(img_array) predicted_class_label = class_labels[predicted_class[0]] gestures.append(predicted_class_label) st.image(img, caption=f"Detected sign: {predicted_class_label}", use_column_width=True) if len(gestures) == 4: generated_sentence = generate_sentences_from_gestures(gestures) st.write("Generated Sentence:") st.write(generated_sentence) chatbot_response = get_response( "Passenger needs to communicate with LLM model. Respond appropriately as a bot.", generated_sentence) st.write("Chatbot Response:") st.write(chatbot_response) # speak(chatbot_response) # engine.runAndWait()
Editor is loading...
Leave a Comment