Untitled
unknown
python
2 years ago
1.2 kB
9
Indexable
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('../data/preprocessed_data_points.csv')
print(df.shape)
df.head(2)
en = "energy-kj_100g"
su = "sugars_100g"
fa = "saturated-fat_100g"
sa = "sodium_100g"
pr = "proteins_100g"
fi = "fiber_100g"
fr = "fruits-vegetables-nuts-estimate-from-ingredients_100g"
profiles = df.loc[:,[en, su, fa, sa, pr, fi, fr]]
print(profiles.shape)
profiles.head(2)
# Getting quantiles using describe()
quantiles = profiles.describe(percentiles=[0, 0.20, 0.40, 0.60, 0.80, 1])
profile_dict = {}
# Plotting distribution curves for each column
for col in profiles.columns:
plt.figure(figsize=(8, 5))
sns.histplot(profiles[col], kde=True, bins=30, color='green', stat='density')
plt.title(f'Distribution of {col}')
plt.xlabel('Values')
plt.ylabel('Density')
each_criteria = []
# Plotting quantile lines on the distribution plot
for quantile in quantiles.loc[['0%', '20%', '40%', '60%', '80%', '100%'], col]:
plt.axvline(x=quantile, color='red', linestyle='--', linewidth=1)
each_criteria.append(quantile)
profile_dict[col] = each_criteria
plt.show()Editor is loading...
Leave a Comment