Untitled

 avatar
unknown
python
a year ago
1.1 kB
4
Indexable
import numpy as np

# Function to create percentile bins and count the number of samples in each bin
def calculate_percentile_bins(data, num_bins=10):
    # Calculate percentiles
    percentiles = np.percentile(data, np.arange(0, 100, 100/num_bins))
    percentiles = np.append(percentiles, np.max(data))  # To include the max value

    # Count samples in each bin
    bin_counts = np.histogram(data, bins=percentiles)[0]
    return percentiles, bin_counts

# Setting up the plot for numerical attributes
fig, axes = plt.subplots(len(numerical_attributes), 1, figsize=(10, 5 * len(numerical_attributes)))

# Creating percentile distribution plots for each numerical attribute
for i, attr in enumerate(numerical_attributes):
    percentiles, bin_counts = calculate_percentile_bins(dataset[attr])
    axes[i].bar(range(len(bin_counts)), bin_counts, tick_label=[f"{int(p)}" for p in percentiles[:-1]])
    axes[i].set_title(f'10% Percentile Distribution for {attr}')
    axes[i].set_xlabel('Percentile Bins')
    axes[i].set_ylabel('Sample Count')

plt.tight_layout()
plt.show()
Editor is loading...
Leave a Comment