Untitled

 avatar
user_3592770
plain_text
2 years ago
907 B
9
Indexable
from pyspark.sql.types import StructType, StructField , StringType, IntegerType
from pyspark.sql.functions import lit 
data2 = [("James","","Smith", "12345", "M", 3000),
         ("Michael","Rose","","23456","F", 4000),
         ("Robert","","Williams","32456", "M", 4000),
         ("James","Joe","Callab", "11111", "F", 2000),
         ("Gill","","Anthony","22222","M", 1000)
        ]

schema = StructType([ \
      StructField("firstname", StringType(), True), \
      StructField("middlename", StringType(), True), \
      StructField("lastname", StringType(), True), \
      StructField("id", StringType(), True), \
      StructField("gender", StringType(), True), \
      StructField("salary", IntegerType(), True)                  
    ])


df = spark.createDataFrame(data = data2, schema= schema)
df.withColumn("Country", lit("USA"))
df.printSchema()
df.show(truncate = False)
Editor is loading...