Untitled
unknown
python
a year ago
1.1 kB
4
Indexable
import numpy as np # Function to create percentile bins and count the number of samples in each bin def calculate_percentile_bins(data, num_bins=10): # Calculate percentiles percentiles = np.percentile(data, np.arange(0, 100, 100/num_bins)) percentiles = np.append(percentiles, np.max(data)) # To include the max value # Count samples in each bin bin_counts = np.histogram(data, bins=percentiles)[0] return percentiles, bin_counts # Setting up the plot for numerical attributes fig, axes = plt.subplots(len(numerical_attributes), 1, figsize=(10, 5 * len(numerical_attributes))) # Creating percentile distribution plots for each numerical attribute for i, attr in enumerate(numerical_attributes): percentiles, bin_counts = calculate_percentile_bins(dataset[attr]) axes[i].bar(range(len(bin_counts)), bin_counts, tick_label=[f"{int(p)}" for p in percentiles[:-1]]) axes[i].set_title(f'10% Percentile Distribution for {attr}') axes[i].set_xlabel('Percentile Bins') axes[i].set_ylabel('Sample Count') plt.tight_layout() plt.show()
Editor is loading...
Leave a Comment