Untitled

import numpy as np
from sklearn.metrics import precision_score, recall_score
import matplotlib.pyplot as plt

# Example of ground truth and predicted probabilities
y_true = np.array([0, 1, 0, 1, 1, 0, 1, 0])  # Actual labels (0 or 1)
y_pred_prob = np.array([0.1, 0.8, 0.4, 0.9, 0.95, 0.2, 0.85, 0.3])  # Predicted probabilities

# Define a function to calculate precision, recall, and effort at a given threshold
def calculate_effort(y_true, y_pred_prob, threshold=0.5, alpha=1, beta=1):
    # Convert predicted probabilities to binary predictions using the threshold
    y_pred = (y_pred_prob >= threshold).astype(int)
    
    # Calculate precision and recall
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    
    # Calculate False Positives and False Negatives
    false_positives = np.sum((y_pred == 1) & (y_true == 0))
    false_negatives = np.sum((y_pred == 0) & (y_true == 1))
    
    # Calculate effort (weighted sum of False Positives and False Negatives)
    effort = alpha * false_positives + beta * false_negatives
    
    return precision, recall, effort

# Define function to find the best threshold with minimal effort
def find_best_threshold(y_true, y_pred_prob, alpha=1, beta=1):
    thresholds = np.linspace(0, 1, 100)  # Create 100 threshold values from 0 to 1
    efforts = []
    precisions = []
    recalls = []
    
    for threshold in thresholds:
        precision, recall, effort = calculate_effort(y_true, y_pred_prob, threshold, alpha, beta)
        efforts.append(effort)
        precisions.append(precision)
        recalls.append(recall)
    
    # Convert lists to numpy arrays for easier manipulation
    efforts = np.array(efforts)
    precisions = np.array(precisions)
    recalls = np.array(recalls)
    
    # Find the threshold with the minimal effort
    min_effort_index = np.argmin(efforts)
    best_threshold = thresholds[min_effort_index]
    
    return best_threshold, precisions[min_effort_index], recalls[min_effort_index], efforts[min_effort_index], thresholds, efforts, precisions, recalls

# Find the best threshold
best_threshold, best_precision, best_recall, best_effort, thresholds, efforts, precisions, recalls = find_best_threshold(y_true, y_pred_prob)

# Print the best threshold and corresponding precision, recall, and effort
print(f"Best Threshold: {best_threshold:.2f}")
print(f"Best Precision: {best_precision:.2f}")
print(f"Best Recall: {best_recall:.2f}")
print(f"Best Effort: {best_effort:.2f}")

# Plot Precision, Recall, and Effort vs. Threshold
plt.figure(figsize=(10, 6))
plt.plot(thresholds, precisions, label='Precision', color='blue')
plt.plot(thresholds, recalls, label='Recall', color='green')
plt.plot(thresholds, efforts, label='Effort', color='red')
plt.axvline(x=best_threshold, linestyle='--', color='black', label=f'Best Threshold: {best_threshold:.2f}')
plt.xlabel('Threshold')
plt.ylabel('Value')
plt.legend()
plt.title('Precision, Recall, and Effort vs. Threshold')
plt.show()
Editor is loading...