mail@pastecode.io avatar
21 days ago
4.4 kB
import streamlit as st  # Import Streamlit for web app creation
from streamlit_webrtc import webrtc_streamer  # Import WebRTC for video streaming
import av  # Import av for video frame handling
import cv2  # Import OpenCV for image processing
import numpy as np  # Import NumPy for numerical operations
import mediapipe as mp  # Import MediaPipe for face and hand tracking
from keras.models import load_model  # Import Keras for loading the emotion model
import webbrowser  # Import webbrowser to open YouTube links

# Load the pre-trained emotion recognition model

model  = load_model("model.h5")
# Load emotion labels
label = np.load("labels.npy")

# Initialize MediaPipe solutions for holistic and facial landmarks tracking
holistic = mp.solutions.holistic
hands = mp.solutions.hands
holis = holistic.Holistic()
drawing = mp.solutions.drawing_utils

# Create the web app header
st.header("Emotion Based Music Recommender")

# Set initial session state variable for video capture
if "run" not in st.session_state:
	st.session_state["run"] = "true"

# Load previously saved emotion (if any)
	emotion = np.load("emotion.npy")[0]

# Check if emotion is captured and set session state accordingly
if not(emotion):
	st.session_state["run"] = "true"
	st.session_state["run"] = "false"

# Define a class to process video frames
class EmotionProcessor:
	def recv(self, frame):
		# Convert frame to NumPy array
		frm = frame.to_ndarray(format="bgr24")

		# Flip the frame horizontally
		frm = cv2.flip(frm, 1)

		# Detect facesusing MediaPipe
		res = holis.process(cv2.cvtColor(frm, cv2.COLOR_BGR2RGB))

		# Extract face landmarks
		lst = []

		if res.face_landmarks:
			for i in res.face_landmarks.landmark:
				lst.append(i.x - res.face_landmarks.landmark[1].x)
				lst.append(i.y - res.face_landmarks.landmark[1].y)

			if res.left_hand_landmarks:
				for i in res.left_hand_landmarks.landmark:
					lst.append(i.x - res.left_hand_landmarks.landmark[8].x)
					lst.append(i.y - res.left_hand_landmarks.landmark[8].y)
				for i in range(42):

			if res.right_hand_landmarks:
				for i in res.right_hand_landmarks.landmark:
					lst.append(i.x - res.right_hand_landmarks.landmark[8].x)
					lst.append(i.y - res.right_hand_landmarks.landmark[8].y)
				for i in range(42):

			# Reshape landmarks array for model input
			lst = np.array(lst).reshape(1,-1)

			# Predict emotion using the loaded model
			pred = label[np.argmax(model.predict(lst))]

			# Print the predicted emotion
			cv2.putText(frm, pred, (50,50),cv2.FONT_ITALIC, 1, (255,0,0),2)

			np.save("emotion.npy", np.array([pred]))

		# Draw landmarks on the frame
		drawing.draw_landmarks(frm, res.face_landmarks, holistic.FACEMESH_TESSELATION,
								landmark_drawing_spec=drawing.DrawingSpec(color=(0,0,255), thickness=-1, circle_radius=1),
		drawing.draw_landmarks(frm, res.left_hand_landmarks, hands.HAND_CONNECTIONS)
		drawing.draw_landmarks(frm, res.right_hand_landmarks, hands.HAND_CONNECTIONS)

		# Return the processed video frame
		return av.VideoFrame.from_ndarray(frm, format="bgr24")

# Get user input for language and singer preferences
lang = st.text_input("Language")
singer = st.text_input("singer")

# Start video streaming if language and singer are provided, and video capture is enabled
if lang and singer and st.session_state["run"] != "false":
	webrtc_streamer(key="key", desired_playing_state=True,

# Create a button to trigger song recommendations
btn = st.button("Recommend me songs")

# If button is clicked, check if emotion is captured and proceed accordingly
if btn:
	if not(emotion):
		st.warning("Please let me capture your emotion first")
		st.session_state["run"] = "true"
		# If emotion is captured:
    	# - Open YouTube search results with language, emotion, and singer preferences
    	# - Clear saved emotion for the next recommendation cycle
    	# - Reset session state variable to pause video streaming
		np.save("emotion.npy", np.array([""]))
		st.session_state["run"] = "false"
Leave a Comment