Untitled
unknown
plain_text
a year ago
3.0 kB
6
Indexable
import numpy as np
from sklearn.metrics import precision_score, recall_score
import matplotlib.pyplot as plt
# Example of ground truth and predicted probabilities
y_true = np.array([0, 1, 0, 1, 1, 0, 1, 0]) # Actual labels (0 or 1)
y_pred_prob = np.array([0.1, 0.8, 0.4, 0.9, 0.95, 0.2, 0.85, 0.3]) # Predicted probabilities
# Define a function to calculate precision, recall, and effort at a given threshold
def calculate_effort(y_true, y_pred_prob, threshold=0.5, alpha=1, beta=1):
# Convert predicted probabilities to binary predictions using the threshold
y_pred = (y_pred_prob >= threshold).astype(int)
# Calculate precision and recall
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
# Calculate False Positives and False Negatives
false_positives = np.sum((y_pred == 1) & (y_true == 0))
false_negatives = np.sum((y_pred == 0) & (y_true == 1))
# Calculate effort (weighted sum of False Positives and False Negatives)
effort = alpha * false_positives + beta * false_negatives
return precision, recall, effort
# Define function to find the best threshold with minimal effort
def find_best_threshold(y_true, y_pred_prob, alpha=1, beta=1):
thresholds = np.linspace(0, 1, 100) # Create 100 threshold values from 0 to 1
efforts = []
precisions = []
recalls = []
for threshold in thresholds:
precision, recall, effort = calculate_effort(y_true, y_pred_prob, threshold, alpha, beta)
efforts.append(effort)
precisions.append(precision)
recalls.append(recall)
# Convert lists to numpy arrays for easier manipulation
efforts = np.array(efforts)
precisions = np.array(precisions)
recalls = np.array(recalls)
# Find the threshold with the minimal effort
min_effort_index = np.argmin(efforts)
best_threshold = thresholds[min_effort_index]
return best_threshold, precisions[min_effort_index], recalls[min_effort_index], efforts[min_effort_index], thresholds, efforts, precisions, recalls
# Find the best threshold
best_threshold, best_precision, best_recall, best_effort, thresholds, efforts, precisions, recalls = find_best_threshold(y_true, y_pred_prob)
# Print the best threshold and corresponding precision, recall, and effort
print(f"Best Threshold: {best_threshold:.2f}")
print(f"Best Precision: {best_precision:.2f}")
print(f"Best Recall: {best_recall:.2f}")
print(f"Best Effort: {best_effort:.2f}")
# Plot Precision, Recall, and Effort vs. Threshold
plt.figure(figsize=(10, 6))
plt.plot(thresholds, precisions, label='Precision', color='blue')
plt.plot(thresholds, recalls, label='Recall', color='green')
plt.plot(thresholds, efforts, label='Effort', color='red')
plt.axvline(x=best_threshold, linestyle='--', color='black', label=f'Best Threshold: {best_threshold:.2f}')
plt.xlabel('Threshold')
plt.ylabel('Value')
plt.legend()
plt.title('Precision, Recall, and Effort vs. Threshold')
plt.show()
Editor is loading...
Leave a Comment