Untitled
unknown
python
a year ago
1.2 kB
5
Indexable
import pandas as pd import matplotlib.pyplot as plt import seaborn as sns df = pd.read_csv('../data/preprocessed_data_points.csv') print(df.shape) df.head(2) en = "energy-kj_100g" su = "sugars_100g" fa = "saturated-fat_100g" sa = "sodium_100g" pr = "proteins_100g" fi = "fiber_100g" fr = "fruits-vegetables-nuts-estimate-from-ingredients_100g" profiles = df.loc[:,[en, su, fa, sa, pr, fi, fr]] print(profiles.shape) profiles.head(2) # Getting quantiles using describe() quantiles = profiles.describe(percentiles=[0, 0.20, 0.40, 0.60, 0.80, 1]) profile_dict = {} # Plotting distribution curves for each column for col in profiles.columns: plt.figure(figsize=(8, 5)) sns.histplot(profiles[col], kde=True, bins=30, color='green', stat='density') plt.title(f'Distribution of {col}') plt.xlabel('Values') plt.ylabel('Density') each_criteria = [] # Plotting quantile lines on the distribution plot for quantile in quantiles.loc[['0%', '20%', '40%', '60%', '80%', '100%'], col]: plt.axvline(x=quantile, color='red', linestyle='--', linewidth=1) each_criteria.append(quantile) profile_dict[col] = each_criteria plt.show()
Editor is loading...
Leave a Comment