Untitled
unknown
plain_text
2 years ago
1.3 kB
9
Indexable
import pandas as pd
import matplotlib.pyplot as plt
from pyspark.sql import functions as F
male_purchase_sum = df[df['Gender']=='M'].agg(F.sum("purchase")).collect()[0][0]
female_purchase_sum = df[df['Gender']=='F'].agg(F.sum("purchase")).collect()[0][0]
genders = ['Male', 'Female']
purchase_sums = [male_purchase_sum, female_purchase_sum]
# for pie chart
plt.figure(figsize=(6, 6))
plt.pie(purchase_sums, labels=genders, autopct='%1.1f%%', startangle=140)
font2 = {'family':'serif','color':'green','size':25}
plt.title('Purchase Sum by Gender', loc='right', fontdict=font2)
plt.show()
# for bar chart
plt.bar(genders, purchase_sums)
# function non-scientific notation
def format_large_tick_value(x, pos):
if x >= 1e6:
# Convert values >= 1 million to millions with one decimal place
return f'{x / 1e6:.1f}M'
else:
return f'{x:.0f}'
# Add percentage annotations on top of the bars
for i, percentage in enumerate(percentages):
plt.text(i, purchase_sums[i] + 10, f"{percentage:.2f}%", ha="center")
font2 = {'family':'serif','color':'purple','size':25}
plt.title('Purchase Sum by Gender', loc='right', fontdict=font2, pad=40)
plt.gca().yaxis.set_major_formatter(FuncFormatter(format_large_tick_value))
plt.show()Editor is loading...