Untitled
user_3592770
plain_text
2 years ago
1.0 kB
5
Indexable
Never
from pyspark.sql.types import StructType, StructField , StringType, IntegerType from pyspark.sql.functions import lit %sql CREATE DATABASE Day3 teamDF.createOrReplaceTempView("team") from pyspark.sql.types import StructType, StructField, StringType, IntegerType # Define the schema for the DataFrame schema = StructType([ StructField("First_Name", StringType(), True), StructField("Last_Name", StringType(), True), StructField("Age", IntegerType(), True), StructField("Residential_Area", StringType(), True) ]) # Create the initial PySpark DataFrame team_data = [("George","Mouxios",27,"40 Ekklisies"), ("Konstantinos","Ntellas",29,"Toumpa"), ("Persa","Antoniou",27,"Euosmos"), ("Vasileia","Tranterou",25,"Ano Poli"), ("Ilona","Baimpouridou",28,"Kalamaria")] teamDF = spark.createDataFrame(team_data, schema) # Show the resulting DataFrame display(teamDF) partitionedDF = teamDF.repartition("Age")