Untitled
unknown
python
2 years ago
1.1 kB
9
Indexable
import numpy as np
# Function to create percentile bins and count the number of samples in each bin
def calculate_percentile_bins(data, num_bins=10):
# Calculate percentiles
percentiles = np.percentile(data, np.arange(0, 100, 100/num_bins))
percentiles = np.append(percentiles, np.max(data)) # To include the max value
# Count samples in each bin
bin_counts = np.histogram(data, bins=percentiles)[0]
return percentiles, bin_counts
# Setting up the plot for numerical attributes
fig, axes = plt.subplots(len(numerical_attributes), 1, figsize=(10, 5 * len(numerical_attributes)))
# Creating percentile distribution plots for each numerical attribute
for i, attr in enumerate(numerical_attributes):
percentiles, bin_counts = calculate_percentile_bins(dataset[attr])
axes[i].bar(range(len(bin_counts)), bin_counts, tick_label=[f"{int(p)}" for p in percentiles[:-1]])
axes[i].set_title(f'10% Percentile Distribution for {attr}')
axes[i].set_xlabel('Percentile Bins')
axes[i].set_ylabel('Sample Count')
plt.tight_layout()
plt.show()
Editor is loading...
Leave a Comment