Untitled
unknown
plain_text
2 years ago
1.3 kB
6
Indexable
import pandas as pd import matplotlib.pyplot as plt from pyspark.sql import functions as F male_purchase_sum = df[df['Gender']=='M'].agg(F.sum("purchase")).collect()[0][0] female_purchase_sum = df[df['Gender']=='F'].agg(F.sum("purchase")).collect()[0][0] genders = ['Male', 'Female'] purchase_sums = [male_purchase_sum, female_purchase_sum] # for pie chart plt.figure(figsize=(6, 6)) plt.pie(purchase_sums, labels=genders, autopct='%1.1f%%', startangle=140) font2 = {'family':'serif','color':'green','size':25} plt.title('Purchase Sum by Gender', loc='right', fontdict=font2) plt.show() # for bar chart plt.bar(genders, purchase_sums) # function non-scientific notation def format_large_tick_value(x, pos): if x >= 1e6: # Convert values >= 1 million to millions with one decimal place return f'{x / 1e6:.1f}M' else: return f'{x:.0f}' # Add percentage annotations on top of the bars for i, percentage in enumerate(percentages): plt.text(i, purchase_sums[i] + 10, f"{percentage:.2f}%", ha="center") font2 = {'family':'serif','color':'purple','size':25} plt.title('Purchase Sum by Gender', loc='right', fontdict=font2, pad=40) plt.gca().yaxis.set_major_formatter(FuncFormatter(format_large_tick_value)) plt.show()
Editor is loading...