a month ago
3.0 kB
import numpy as np from sklearn.metrics import precision_score, recall_score import matplotlib.pyplot as plt # Example of ground truth and predicted probabilities y_true = np.array([0, 1, 0, 1, 1, 0, 1, 0]) # Actual labels (0 or 1) y_pred_prob = np.array([0.1, 0.8, 0.4, 0.9, 0.95, 0.2, 0.85, 0.3]) # Predicted probabilities # Define a function to calculate precision, recall, and effort at a given threshold def calculate_effort(y_true, y_pred_prob, threshold=0.5, alpha=1, beta=1): # Convert predicted probabilities to binary predictions using the threshold y_pred = (y_pred_prob >= threshold).astype(int) # Calculate precision and recall precision = precision_score(y_true, y_pred) recall = recall_score(y_true, y_pred) # Calculate False Positives and False Negatives false_positives = np.sum((y_pred == 1) & (y_true == 0)) false_negatives = np.sum((y_pred == 0) & (y_true == 1)) # Calculate effort (weighted sum of False Positives and False Negatives) effort = alpha * false_positives + beta * false_negatives return precision, recall, effort # Define function to find the best threshold with minimal effort def find_best_threshold(y_true, y_pred_prob, alpha=1, beta=1): thresholds = np.linspace(0, 1, 100) # Create 100 threshold values from 0 to 1 efforts = [] precisions = [] recalls = [] for threshold in thresholds: precision, recall, effort = calculate_effort(y_true, y_pred_prob, threshold, alpha, beta) efforts.append(effort) precisions.append(precision) recalls.append(recall) # Convert lists to numpy arrays for easier manipulation efforts = np.array(efforts) precisions = np.array(precisions) recalls = np.array(recalls) # Find the threshold with the minimal effort min_effort_index = np.argmin(efforts) best_threshold = thresholds[min_effort_index] return best_threshold, precisions[min_effort_index], recalls[min_effort_index], efforts[min_effort_index], thresholds, efforts, precisions, recalls # Find the best threshold best_threshold, best_precision, best_recall, best_effort, thresholds, efforts, precisions, recalls = find_best_threshold(y_true, y_pred_prob) # Print the best threshold and corresponding precision, recall, and effort print(f"Best Threshold: {best_threshold:.2f}") print(f"Best Precision: {best_precision:.2f}") print(f"Best Recall: {best_recall:.2f}") print(f"Best Effort: {best_effort:.2f}") # Plot Precision, Recall, and Effort vs. Threshold plt.figure(figsize=(10, 6)) plt.plot(thresholds, precisions, label='Precision', color='blue') plt.plot(thresholds, recalls, label='Recall', color='green') plt.plot(thresholds, efforts, label='Effort', color='red') plt.axvline(x=best_threshold, linestyle='--', color='black', label=f'Best Threshold: {best_threshold:.2f}') plt.xlabel('Threshold') plt.ylabel('Value') plt.legend() plt.title('Precision, Recall, and Effort vs. Threshold') plt.show()
Editor is loading...
Leave a Comment