Untitled

mail@pastecode.io avatar
unknown
python
a month ago
1.2 kB
2
Indexable
Never
# Function to create percentile bins from min to max and count the number of samples in each bin
def calculate_percentile_bins_full_range(data, num_bins=10):
    # Calculate min and max
    min_val, max_val = np.min(data), np.max(data)

    # Generate percentiles within the full range from min to max
    percentiles = np.linspace(min_val, max_val, num_bins + 1)

    # Count samples in each bin
    bin_counts = np.histogram(data, bins=percentiles)[0]
    return percentiles, bin_counts

# Setting up the plot for numerical attributes with full range bins
fig, axes = plt.subplots(len(numerical_attributes), 1, figsize=(10, 5 * len(numerical_attributes)))

# Creating percentile distribution plots for each numerical attribute
for i, attr in enumerate(numerical_attributes):
    percentiles, bin_counts = calculate_percentile_bins_full_range(dataset[attr])
    axes[i].bar(range(len(bin_counts)), bin_counts, tick_label=[f"{int(p)}" for p in percentiles[:-1]])
    axes[i].set_title(f'10% Percentile Distribution (Full Range) for {attr}')
    axes[i].set_xlabel('Percentile Bins (Min to Max)')
    axes[i].set_ylabel('Sample Count')

plt.tight_layout()
plt.show()
Leave a Comment