Untitled

mail@pastecode.io avatar
unknown
plain_text
11 days ago
1.1 kB
1
Indexable
Never
# Descriptive statistics
desc_stats = df.describe(include='all', datetime_is_numeric=True)

# Grouping and Aggregation
grouped_df = df.groupby(['transaction_date', 'label']).agg({
    'total_monthly_litres_dil': 'sum',
    'MO_dealer_monthly_ton_GZPN': 'mean',
    'KP_dealer_monthly_ton_GZPN': 'mean',
    'MO_KP_dealer_monthly_ton_GZPN': 'mean',
    'client_inn_dil': 'count'
}).reset_index()

# Plotting
fig, ax1 = plt.subplots(figsize=(12, 6))

# Plotting total monthly litres
colors = {'MO': 'blue', 'KP': 'green', 'MO_KP': 'red', 'Other': 'gray'}
grouped_df['color'] = grouped_df['label'].apply(lambda x: colors[x])

for label, group in grouped_df.groupby('label'):
    ax1.scatter(group['transaction_date'], group['total_monthly_litres_dil'], label=label, color=colors[label], s=50)

ax1.set_xlabel('Transaction Date')
ax1.set_ylabel('Total Monthly Litres', color='tab:blue')
ax1.tick_params(axis='y', labelcolor='tab:blue')
ax1.legend(title='Cluster')

plt.title('Total Monthly Litres by Cluster Over Time')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

desc_stats, grouped_df.head()
Leave a Comment