Untitled

 avatar
user_3592770
plain_text
2 years ago
1.0 kB
5
Indexable
Never
from pyspark.sql.types import StructType, StructField , StringType, IntegerType
from pyspark.sql.functions import lit 

%sql
CREATE DATABASE Day3

teamDF.createOrReplaceTempView("team")

from pyspark.sql.types import StructType, StructField, StringType, IntegerType

# Define the schema for the DataFrame
schema = StructType([
    StructField("First_Name", StringType(), True),
    StructField("Last_Name", StringType(), True),
    StructField("Age", IntegerType(), True),
    StructField("Residential_Area", StringType(), True)
])

# Create the initial PySpark DataFrame
team_data = [("George","Mouxios",27,"40 Ekklisies"),
             ("Konstantinos","Ntellas",29,"Toumpa"),
             ("Persa","Antoniou",27,"Euosmos"),
             ("Vasileia","Tranterou",25,"Ano Poli"),
             ("Ilona","Baimpouridou",28,"Kalamaria")]

teamDF = spark.createDataFrame(team_data, schema)

# Show the resulting DataFrame
display(teamDF)

partitionedDF = teamDF.repartition("Age")