Untitled

[11:15 AM] Najiya Mujeeb
 
import streamlit as st
import tensorflow as tf
import numpy as np
import cv2
import time
import pyttsx3
import os
from PIL import Image
from dotenv import load_dotenv
import google.generativeai as genai
 
import io
 
# Streamlit app configuration
st.set_page_config(
    page_title="Sign Language Detection Bot",
    page_icon="",
    layout="wide",
    initial_sidebar_state="expanded",
)
 
# Sidebar options
st.sidebar.title("Sign Language Detection Bot")
 
 
input_option = st.sidebar.selectbox(
    "Choose input method",
    ["Upload Images", "Use Webcam", "Capture from Webcam"]
)
 
# # Load the best saved model
# model = tf.keras.models.load_model('mod_gd.h5')
 
 
try:
    model = tf.keras.models.load_model('mod_gd.h5')
except Exception as e:
    print("Error loading the model:", e)
 
 
# Define image dimensions
img_height, img_width = 224, 224
 
# Preprocess image function
def preprocess_frame(frame):
    img = cv2.resize(frame, (img_height, img_width))
    img_array = np.expand_dims(img, axis=0)  # Add batch dimension
    img_array = img_array / 255.0  # Rescale the image
    return img_array
 
# Predict class from image
def predict_frame(frame):
    processed_frame = preprocess_frame(frame)
    prediction = model.predict(processed_frame)
    predicted_class = np.argmax(prediction, axis=1)
    return predicted_class
 
# Class labels (replace with your actual names)
class_labels = ['A LOT', 'ABUSE', 'AFRAID', 'AGREE', 'ALL', 'ANGRY', 'ANY', 'ANYTHING', 'APPRECIATE', 'BAD',
                'BEAUTIFUL', 'BECOME', 'BED', 'BORED', 'BRING', 'CHAT', 'CLASS', 'COLD', 'COLLEGE_SCHOOL', 'COMB',
                'COME', 'CONGRATULATIONS', 'CRYING', 'DARE', 'DIFFERENCE', 'DILEMMA', 'DISAPPOINTED', 'DO',
                "DON'T CARE", 'ENJOY', 'FAVOUR', 'FEVER', 'FINE', 'FOOD', 'FREE', 'FRIEND', 'FROM', 'GLASS', 'GO',
                'GOOD', 'GOT', 'GRATEFUL', 'HAD', 'HAPPENED', 'HAPPY', 'HEAR', 'HEART', 'HELLO_HI', 'HELP', 'HIDING',
                'HOW', 'HUNGRY', 'HURT', 'I_ME_MINE_MY', 'KIND', 'KNOW', 'LEAVE', 'LIGHT', 'LIKE', 'LIKE_LOVE', 'MAKE',
                'MEAN IT', 'MEDICINE', 'MEET', 'NAME', 'NEED', 'NEVER', 'NICE', 'NOT', 'NOW', 'NUMBER', 'OLD_AGE',
                'ON THE WAY', 'ONWARDS', 'OUTSIDE', 'PHONE', 'PLACE', 'PLANNED', 'PLEASE', 'POUR', 'PREPARE',
                'PROMISE', 'REALLY', 'REPEAT', 'ROOM', 'SERVE', 'SHIRT', 'SITTING', 'SLEEP', 'SLOWER', 'SO MUCH',
                'SOFTLY', 'SOME HOW', 'SOME MORE', 'SOME ONE', 'SOMETHING', 'SORRY', 'SPEAK', 'STOP', 'STUBBORN',
                'SURE', 'TAKE CARE', 'TAKE TIME', 'TALK', 'TELL', 'THANK', 'THAT', 'THERE', 'THINGS', 'THINK',
                'THIRSTY', 'THIS ONE', 'TIRED', 'TODAY', 'TRAIN', 'TRUST', 'TRUTH', 'TURN ON', 'UNDERSTAND', 'VERY',
                'WANT', 'WATER', 'WEAR', 'WELCOME', 'WHAT', 'WHEN', 'WHERE', 'WHO', 'WORRY', 'YOU']
 
# Ensure API key is set securely using environment variables
# load_dotenv()
# api_key = os.getenv("GOOGLE_API_KEY")
 
load_dotenv()
api_key= 'AIzaSyBxbz9c30oe0PsCIKx1v0F_Atyn-pNcG4Q'
genai.configure(api_key=api_key)
 
# Guard against missing API key
if not api_key:
    st.error('Please set your Google Generative AI API key in the ".env" file.')
    exit(1)
 
# genai.configure(api_key=api_key)
 
# Generation configuration
generation_config = {'temperature': 0.9, 'top_p': 1, 'top_k': 1, 'max_output_tokens': 100}
 
def get_response(prompt, input_text):
    try:
        model = genai.GenerativeModel('gemini-pro')
        response = model.generate_content([prompt, input_text])
        return response.text
    except Exception as e:
        return f"Error: {e}"
 
def generate_sentences_from_gestures(gestures):
    gesture_string = ", ".join(gestures)
    prompt = f"I am creating a real-time sign language translation in a car. Here our passenger is deaf and dumb. So he is using sign language to communicate with the driver. The actions shown are: {gesture_string}. Generate a possible sentence that the passenger might be trying to communicate with the driver, remember that the action is by passenger not driver."
    response = get_response(prompt, '')
    return response
 
# Text-to-speech engine
# engine = pyttsx3.init()
 
# def speak(text):
#     if text:
#         engine.say(text)
#         # Workaround for RuntimeError: run loop already started
#         engine.iterate()
 
# Main UI logic
st.title("Sign Language Detection Bot")
# st.write(
#     "Upload four images of sign language gestures or use your webcam to capture them in real-time. The bot will detect the signs, generate a sentence, and provide a response.")
 
gestures = []
if input_option == "Upload Images":  
    uploaded_files = st.file_uploader("Choose four images", accept_multiple_files=True, type=["jpg", "png", "jpeg"])
 
    if len(uploaded_files) == 4:
        for uploaded_file in uploaded_files:
            image = Image.open(uploaded_file)
            frame = np.array(image)
            predicted_class = predict_frame(frame)
            predicted_class_label = class_labels[predicted_class[0]]
            gestures.append(predicted_class_label)
 
            s = f"<p style='font-size:24px;'>Detected sign: {predicted_class_label}</p>"
            st.markdown(s, unsafe_allow_html=True)
            st.image(image, use_column_width=True)
 
        if len(gestures) == 4:
            generated_sentence = generate_sentences_from_gestures(gestures)
            st.write("Generated Sentence:")
            st.write(generated_sentence)
 
            chatbot_response = get_response(
                "Passenger needs to communicate with LLM model. Respond appropriately as a bot.",
                generated_sentence)
            st.write("Chatbot Response:")
            st.write(chatbot_response)
 
            # speak(chatbot_response)
            # engine.runAndWait()
 
elif input_option == "Use Webcam":
    st.write("Webcam live detection. Show gestures to the webcam.")
    cap = cv2.VideoCapture(0)
    last_capture_time = time.time()
    captured_images = []
    capture_interval = 3  # Interval between each image capture in seconds
    wait_after_response = 5  # Wait time after displaying chatbot response in seconds
 
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
 
        current_time = time.time()
        if current_time - last_capture_time >= capture_interval and len(captured_images) < 4:
            captured_images.append(frame)
            last_capture_time = current_time
 
            st.image(frame, channels="BGR", use_column_width=True)
 
            if len(captured_images) == 4:
                gestures = []
                for captured_image in captured_images:
                    predicted_class = predict_frame(captured_image)
                    predicted_class_label = class_labels[predicted_class[0]]
                    gestures.append(predicted_class_label)
 
                if len(gestures) == 4:
                    generated_sentence = generate_sentences_from_gestures(gestures)
                    st.write("Sentence:")
                    st.write(generated_sentence)
 
                    chatbot_response = get_response(
                        "Passenger needs to communicate with LLM model. Passenger is deaf and dumb. so he is using sign language. Respond appropriately as a bot. bot has to respond in meaningful sentences",
                        generated_sentence)
                    st.write("Chatbot Response:")
                    st.write(chatbot_response)
 
                    time.sleep(wait_after_response)  # Wait after displaying chatbot response
 
                captured_images = []
 
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
 
    cap.release()
    cv2.destroyAllWindows()
 
 
elif input_option == "Capture from Webcam":
    st.write("Take four pictures using your webcam.")
    img_file_buffers = []
    for i in range(4):
        img_file_buffer = st.camera_input(f"Take picture {i + 1}")
        if img_file_buffer is not None:
            img_file_buffers.append(img_file_buffer)
 
    if len(img_file_buffers) == 4:
        for img_file_buffer in img_file_buffers:
            img = Image.open(img_file_buffer)
            img_array = np.array(img)
 
            predicted_class = predict_frame(img_array)
            predicted_class_label = class_labels[predicted_class[0]]
            gestures.append(predicted_class_label)
 
            st.image(img, caption=f"Detected sign: {predicted_class_label}", use_column_width=True)
 
        if len(gestures) == 4:
            generated_sentence = generate_sentences_from_gestures(gestures)
            st.write("Generated Sentence:")
            st.write(generated_sentence)
 
            chatbot_response = get_response(
                "Passenger needs to communicate with LLM model. Respond appropriately as a bot.",
                generated_sentence)
            st.write("Chatbot Response:")
            st.write(chatbot_response)
 
            # speak(chatbot_response)
            # engine.runAndWait()
Editor is loading...