Untitled

mail@pastecode.io avatar
unknown
plain_text
a month ago
4.7 kB
2
Indexable
Never
import math
import mediapipe as mp
import time
import cv2
from tqdm import tqdm
import numpy as np 

def euclidean_distance(point1, point2):
    x1, y1 = point1
    x2, y2 = point2
    return math.sqrt((x2 - x1)**2 + (y2 - y1)**2)

class BlinkDetector:
    def __init__(self):
        self.counter = 0
        self.total_blinks = 0
        self.blink_start_time = 0
        self.blink_durations = []
        self.left_eye_indices = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]
        self.right_eye_indices = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]

    def initialize_face_mesh(self, max_num_faces=1, min_detection_confidence=0.6, min_tracking_confidence=0.7):
        return mp.solutions.face_mesh.FaceMesh(
            max_num_faces=max_num_faces,
            min_detection_confidence=min_detection_confidence,
            min_tracking_confidence=min_tracking_confidence
        )
    
    def detect_landmarks(self, frame, face_mesh_results, draw=False):
        image_height, image_width = frame.shape[:2]
        landmarks = [(int(point.x * image_width), int(point.y * image_height)) for point in face_mesh_results.multi_face_landmarks[0].landmark]

        if draw:
            for landmark in landmarks:
                cv2.circle(frame, landmark, 2, (0, 255, 0), -1)
        
        return landmarks
    
    def calculate_blink_ratio(self, landmarks):
        right_eye = [landmarks[i] for i in self.right_eye_indices]
        left_eye = [landmarks[i] for i in self.left_eye_indices]

        def calculate_eye_ratio(eye_landmarks):
            horizontal_distance = euclidean_distance(eye_landmarks[0], eye_landmarks[8])
            vertical_distance = euclidean_distance(eye_landmarks[12], eye_landmarks[4])
            return horizontal_distance / vertical_distance

        right_eye_ratio = calculate_eye_ratio(right_eye)
        left_eye_ratio = calculate_eye_ratio(left_eye)

        return [right_eye_ratio, left_eye_ratio]
    
    def update_blink_count(self, eyes_ratio):
        if eyes_ratio[0] > 4 or eyes_ratio[1] > 4:
            if self.counter == 0:
                self.blink_start_time = time.time()
            self.counter += 1
        else:
            if self.counter > 4:
                self.total_blinks += 1
                blink_duration = time.time() - self.blink_start_time
                self.blink_durations.append(blink_duration)
                self.counter = 0
        
        return self.total_blinks, self.blink_durations

def initialize_variables():
    return BlinkDetector()

class FaceTrack(BlinkDetector):
    def __init__(self):
        super().__init__()
        self.face_mesh = self.initialize_face_mesh()
        self.frame = None
        self.avg_blink_duration = 0
        self.frames = []
        self.blink_data = []

    def predict(self, img):
        rgb_frame = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = self.face_mesh.process(rgb_frame)

        if results.multi_face_landmarks:
            landmarks = self.detect_landmarks(img, results, draw=True)
            eyes_ratio = self.calculate_blink_ratio(landmarks)
            total_blinks, blink_durations = self.update_blink_count(eyes_ratio)

            cv2.putText(img, "Please blink your eyes", (int(img.shape[1] / 2), 100), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)
            cv2.rectangle(img, (20, 120), (290, 160), (0, 0, 0), -1)
            cv2.putText(img, f'Total Blinks: {total_blinks}', (30, 150), cv2.FONT_HERSHEY_COMPLEX, 0.6, (0, 255, 0), 1)

            if blink_durations:
                self.avg_blink_duration = sum(blink_durations) / len(blink_durations)
                cv2.rectangle(img, (20, 200), (290, 240), (0, 0, 0), -1)
                cv2.putText(img, f'Avg Blink Duration: {self.avg_blink_duration:.2f}s', (30, 235), cv2.FONT_HERSHEY_COMPLEX, 0.6, (0, 255, 0), 1)
                
                last_blink_duration = blink_durations[-1]
                cv2.rectangle(img, (20, 240), (290, 280), (0, 0, 0), -1)
                cv2.putText(img, f"Last Blink Duration: {last_blink_duration:.2f}s", (30, 275), cv2.FONT_HERSHEY_COMPLEX, 0.6, (0, 255, 0), 1)

            self.frame = img
            self.frames.append(self.frame)
            self.blink_data = [total_blinks, blink_durations]

def eye_track_predict(fc, frames, fps):
    preds = []
    for frame_number, frame in enumerate(tqdm(frames)):
        frame = np.copy(frame)
        fc.predict(frame)
        timestamp = frame_number / fps
        data = [timestamp, fc.blink_data[0]]
        preds.append(data)
    return preds, fc.blink_data[1]
Leave a Comment