Untitled
unknown
plain_text
2 years ago
1.1 kB
18
Indexable
# Descriptive statistics
desc_stats = df.describe(include='all', datetime_is_numeric=True)
# Grouping and Aggregation
grouped_df = df.groupby(['transaction_date', 'label']).agg({
'total_monthly_litres_dil': 'sum',
'MO_dealer_monthly_ton_GZPN': 'mean',
'KP_dealer_monthly_ton_GZPN': 'mean',
'MO_KP_dealer_monthly_ton_GZPN': 'mean',
'client_inn_dil': 'count'
}).reset_index()
# Plotting
fig, ax1 = plt.subplots(figsize=(12, 6))
# Plotting total monthly litres
colors = {'MO': 'blue', 'KP': 'green', 'MO_KP': 'red', 'Other': 'gray'}
grouped_df['color'] = grouped_df['label'].apply(lambda x: colors[x])
for label, group in grouped_df.groupby('label'):
ax1.scatter(group['transaction_date'], group['total_monthly_litres_dil'], label=label, color=colors[label], s=50)
ax1.set_xlabel('Transaction Date')
ax1.set_ylabel('Total Monthly Litres', color='tab:blue')
ax1.tick_params(axis='y', labelcolor='tab:blue')
ax1.legend(title='Cluster')
plt.title('Total Monthly Litres by Cluster Over Time')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
desc_stats, grouped_df.head()Editor is loading...
Leave a Comment