Untitled

 avatar
unknown
plain_text
5 months ago
2.9 kB
5
Indexable
import numpy as np
import matplotlib.pyplot as plt
import shap

# Assuming iforest is your Isolation Forest model and X is your dataset
# Create the SHAP explainer (not shown here as it is already done)

# Specify the sample index you want to plot
sample_index = 12  

# Get SHAP values for the entire dataset
# Assuming decision_shap_values.values contains SHAP values for the entire dataset
shap_values_single = decision_shap_values.values[sample_index]  # SHAP values for the selected sample

# Calculate the anomaly score for the selected sample
anomaly_score = iforest.decision_function(X)[sample_index]  # Anomaly score for the selected sample

# Calculate total absolute contribution of SHAP values
total_contribution = np.sum(np.abs(shap_values_single))

# Adjust percentage contributions so that their sum matches the absolute anomaly score
scaled_shap_values = (np.abs(shap_values_single) / total_contribution) * np.abs(anomaly_score)
percent_contributions = (scaled_shap_values / np.abs(anomaly_score)) * 100

# Sort SHAP values by their absolute impact
sorted_idx = np.argsort(np.abs(shap_values_single))[::-1]
sorted_shap_values = shap_values_single[sorted_idx]
sorted_features = np.array(X.columns)[sorted_idx]
sorted_percent_contributions = percent_contributions[sorted_idx]

# Starting value is 0 (we want to shift contributions from 0 to anomaly score)
base_value = 0
previous_value = base_value

# Create the waterfall plot
plt.figure(figsize=(10, 6))

for i in range(len(sorted_shap_values)):
    shap_value = sorted_shap_values[i]
    
    # For positive SHAP values (blue bars)
    if shap_value >= 0:
        plt.barh(
            y=i,
            width=shap_value,
            left=previous_value,
            color='blue',
            edgecolor='black'
        )
    # For negative SHAP values (red bars)
    else:
        plt.barh(
            y=i,
            width=np.abs(shap_value),
            left=previous_value - np.abs(shap_value),
            color='red',
            edgecolor='black'
        )
    
    previous_value += shap_value  # Move to the next base position
    
    # Adding text to show percentages and SHAP values
    plt.text(
        previous_value - (shap_value if shap_value < 0 else 0), 
        i, 
        f'{sorted_percent_contributions[i]:.1f}% ({sorted_shap_values[i]:.4f})', 
        va='center',
        fontsize=10,
        color='black'
    )

# Final anomaly score (end point)
plt.axvline(x=anomaly_score, color='black', linestyle='-', label=f'Anomaly score: {anomaly_score:.4f}')

# Add labels, title, and legend
plt.xlabel('Contribution to Anomaly Score')
plt.title(f'Waterfall Plot of Feature Contributions to Anomaly Score for Sample {sample_index}')
plt.legend()

# Remove y-axis ticks
plt.yticks([])

# Adjust layout
plt.tight_layout()

# Show plot
plt.show()
Editor is loading...
Leave a Comment