Untitled

 avatar
unknown
plain_text
2 years ago
987 B
5
Indexable
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

# create a SparkSession object
spark = SparkSession.builder.appName("CreateDataFrame").getOrCreate()

# define the schema for the DataFrame
schema = StructType([
    StructField("first_name", StringType(), True),
    StructField("middle_name", StringType(), True),
    StructField("last_name", StringType(), True),
    StructField("ssn", StringType(), True),
    StructField("gender", StringType(), True),
    StructField("salary", IntegerType(), True)
])

# create a DataFrame with the given data and schema
data = [
    ("James", "", "Smith", "36636", "M", 3000),
    ("Michael","", "Rose", "40288", "M", 4000),
    ("Robert", "", "Williams", "42114", "M", 4000),
    ("Maria", "Anne", "Jones", "39192", "F", 4000),
    ("Jen", "Mary", "Brown", "", "F", -1)
]
df = spark.createDataFrame(data, schema=schema)

# show the DataFrame
df.show()
Editor is loading...