Untitled
unknown
python
2 years ago
1.2 kB
7
Indexable
# Function to create percentile bins from min to max and count the number of samples in each bin
def calculate_percentile_bins_full_range(data, num_bins=10):
# Calculate min and max
min_val, max_val = np.min(data), np.max(data)
# Generate percentiles within the full range from min to max
percentiles = np.linspace(min_val, max_val, num_bins + 1)
# Count samples in each bin
bin_counts = np.histogram(data, bins=percentiles)[0]
return percentiles, bin_counts
# Setting up the plot for numerical attributes with full range bins
fig, axes = plt.subplots(len(numerical_attributes), 1, figsize=(10, 5 * len(numerical_attributes)))
# Creating percentile distribution plots for each numerical attribute
for i, attr in enumerate(numerical_attributes):
percentiles, bin_counts = calculate_percentile_bins_full_range(dataset[attr])
axes[i].bar(range(len(bin_counts)), bin_counts, tick_label=[f"{int(p)}" for p in percentiles[:-1]])
axes[i].set_title(f'10% Percentile Distribution (Full Range) for {attr}')
axes[i].set_xlabel('Percentile Bins (Min to Max)')
axes[i].set_ylabel('Sample Count')
plt.tight_layout()
plt.show()
Editor is loading...
Leave a Comment