Untitled
unknown
plain_text
2 years ago
987 B
5
Indexable
from pyspark.sql import SparkSession from pyspark.sql.types import StructType, StructField, StringType, IntegerType # create a SparkSession object spark = SparkSession.builder.appName("CreateDataFrame").getOrCreate() # define the schema for the DataFrame schema = StructType([ StructField("first_name", StringType(), True), StructField("middle_name", StringType(), True), StructField("last_name", StringType(), True), StructField("ssn", StringType(), True), StructField("gender", StringType(), True), StructField("salary", IntegerType(), True) ]) # create a DataFrame with the given data and schema data = [ ("James", "", "Smith", "36636", "M", 3000), ("Michael","", "Rose", "40288", "M", 4000), ("Robert", "", "Williams", "42114", "M", 4000), ("Maria", "Anne", "Jones", "39192", "F", 4000), ("Jen", "Mary", "Brown", "", "F", -1) ] df = spark.createDataFrame(data, schema=schema) # show the DataFrame df.show()
Editor is loading...