Untitled
unknown
plain_text
5 months ago
2.9 kB
5
Indexable
import numpy as np import matplotlib.pyplot as plt import shap # Assuming iforest is your Isolation Forest model and X is your dataset # Create the SHAP explainer (not shown here as it is already done) # Specify the sample index you want to plot sample_index = 12 # Get SHAP values for the entire dataset # Assuming decision_shap_values.values contains SHAP values for the entire dataset shap_values_single = decision_shap_values.values[sample_index] # SHAP values for the selected sample # Calculate the anomaly score for the selected sample anomaly_score = iforest.decision_function(X)[sample_index] # Anomaly score for the selected sample # Calculate total absolute contribution of SHAP values total_contribution = np.sum(np.abs(shap_values_single)) # Adjust percentage contributions so that their sum matches the absolute anomaly score scaled_shap_values = (np.abs(shap_values_single) / total_contribution) * np.abs(anomaly_score) percent_contributions = (scaled_shap_values / np.abs(anomaly_score)) * 100 # Sort SHAP values by their absolute impact sorted_idx = np.argsort(np.abs(shap_values_single))[::-1] sorted_shap_values = shap_values_single[sorted_idx] sorted_features = np.array(X.columns)[sorted_idx] sorted_percent_contributions = percent_contributions[sorted_idx] # Starting value is 0 (we want to shift contributions from 0 to anomaly score) base_value = 0 previous_value = base_value # Create the waterfall plot plt.figure(figsize=(10, 6)) for i in range(len(sorted_shap_values)): shap_value = sorted_shap_values[i] # For positive SHAP values (blue bars) if shap_value >= 0: plt.barh( y=i, width=shap_value, left=previous_value, color='blue', edgecolor='black' ) # For negative SHAP values (red bars) else: plt.barh( y=i, width=np.abs(shap_value), left=previous_value - np.abs(shap_value), color='red', edgecolor='black' ) previous_value += shap_value # Move to the next base position # Adding text to show percentages and SHAP values plt.text( previous_value - (shap_value if shap_value < 0 else 0), i, f'{sorted_percent_contributions[i]:.1f}% ({sorted_shap_values[i]:.4f})', va='center', fontsize=10, color='black' ) # Final anomaly score (end point) plt.axvline(x=anomaly_score, color='black', linestyle='-', label=f'Anomaly score: {anomaly_score:.4f}') # Add labels, title, and legend plt.xlabel('Contribution to Anomaly Score') plt.title(f'Waterfall Plot of Feature Contributions to Anomaly Score for Sample {sample_index}') plt.legend() # Remove y-axis ticks plt.yticks([]) # Adjust layout plt.tight_layout() # Show plot plt.show()
Editor is loading...
Leave a Comment