Untitled
user_3592770
plain_text
2 years ago
579 B
27
Indexable
from pyspark.sql.functions import avg, sum # create the DataFrame from the data and schema data2 = [("James","","Smith", "36636", "M", 3000), ("Michael","Rose","","40288","M", 4000), ("Robert","","Williams","42114", "M", 4000), ("Maria","Anne","Jones", "39192", "F", 4000), ("Jen", "Mary", "Brown","","F", -1) ] df = spark.createDataFrame(data2, schema) # group by gender and calculate sum and average of salary result_df = df.groupBy("gender").agg(sum("salary"), avg("salary")) # show the result result_df.show()
Editor is loading...