Untitled

mail@pastecode.io avatar
unknown
plain_text
7 months ago
1.3 kB
4
Indexable
Never
import pandas as pd
import matplotlib.pyplot as plt
from pyspark.sql import functions as F

male_purchase_sum = df[df['Gender']=='M'].agg(F.sum("purchase")).collect()[0][0]
female_purchase_sum = df[df['Gender']=='F'].agg(F.sum("purchase")).collect()[0][0]

genders = ['Male', 'Female']
purchase_sums = [male_purchase_sum, female_purchase_sum]

# for pie chart
plt.figure(figsize=(6, 6))
plt.pie(purchase_sums, labels=genders, autopct='%1.1f%%', startangle=140)
font2 = {'family':'serif','color':'green','size':25}
plt.title('Purchase Sum by Gender', loc='right', fontdict=font2)
plt.show()

# for bar chart
plt.bar(genders, purchase_sums)

# function non-scientific notation
def format_large_tick_value(x, pos):
    if x >= 1e6:
        # Convert values >= 1 million to millions with one decimal place
        return f'{x / 1e6:.1f}M'
    else:
        return f'{x:.0f}'  

# Add percentage annotations on top of the bars
for i, percentage in enumerate(percentages):
    plt.text(i, purchase_sums[i] + 10, f"{percentage:.2f}%", ha="center")
font2 = {'family':'serif','color':'purple','size':25}

plt.title('Purchase Sum by Gender', loc='right', fontdict=font2, pad=40)
plt.gca().yaxis.set_major_formatter(FuncFormatter(format_large_tick_value))

plt.show()