Untitled
unknown
plain_text
a year ago
2.9 kB
7
Indexable
import numpy as np
import matplotlib.pyplot as plt
import shap
# Assuming iforest is your Isolation Forest model and X is your dataset
# Create the SHAP explainer (not shown here as it is already done)
# Specify the sample index you want to plot
sample_index = 12
# Get SHAP values for the entire dataset
# Assuming decision_shap_values.values contains SHAP values for the entire dataset
shap_values_single = decision_shap_values.values[sample_index] # SHAP values for the selected sample
# Calculate the anomaly score for the selected sample
anomaly_score = iforest.decision_function(X)[sample_index] # Anomaly score for the selected sample
# Calculate total absolute contribution of SHAP values
total_contribution = np.sum(np.abs(shap_values_single))
# Adjust percentage contributions so that their sum matches the absolute anomaly score
scaled_shap_values = (np.abs(shap_values_single) / total_contribution) * np.abs(anomaly_score)
percent_contributions = (scaled_shap_values / np.abs(anomaly_score)) * 100
# Sort SHAP values by their absolute impact
sorted_idx = np.argsort(np.abs(shap_values_single))[::-1]
sorted_shap_values = shap_values_single[sorted_idx]
sorted_features = np.array(X.columns)[sorted_idx]
sorted_percent_contributions = percent_contributions[sorted_idx]
# Starting value is 0 (we want to shift contributions from 0 to anomaly score)
base_value = 0
previous_value = base_value
# Create the waterfall plot
plt.figure(figsize=(10, 6))
for i in range(len(sorted_shap_values)):
shap_value = sorted_shap_values[i]
# For positive SHAP values (blue bars)
if shap_value >= 0:
plt.barh(
y=i,
width=shap_value,
left=previous_value,
color='blue',
edgecolor='black'
)
# For negative SHAP values (red bars)
else:
plt.barh(
y=i,
width=np.abs(shap_value),
left=previous_value - np.abs(shap_value),
color='red',
edgecolor='black'
)
previous_value += shap_value # Move to the next base position
# Adding text to show percentages and SHAP values
plt.text(
previous_value - (shap_value if shap_value < 0 else 0),
i,
f'{sorted_percent_contributions[i]:.1f}% ({sorted_shap_values[i]:.4f})',
va='center',
fontsize=10,
color='black'
)
# Final anomaly score (end point)
plt.axvline(x=anomaly_score, color='black', linestyle='-', label=f'Anomaly score: {anomaly_score:.4f}')
# Add labels, title, and legend
plt.xlabel('Contribution to Anomaly Score')
plt.title(f'Waterfall Plot of Feature Contributions to Anomaly Score for Sample {sample_index}')
plt.legend()
# Remove y-axis ticks
plt.yticks([])
# Adjust layout
plt.tight_layout()
# Show plot
plt.show()
Editor is loading...
Leave a Comment