Untitled
unknown
plain_text
a year ago
1.3 kB
12
Indexable
from pyspark.sql.functions import col, to_timestamp, to_date
def cast_columns_before_writing(df):
# Define the specific columns and the types you want to cast
columns_to_cast = {
"ServiceInfo1": "string"
"AvailabilityZone": "string"
"InvoiceSectionId": "string"
"CostAllocationRuleName": "string"
"Fab": "string"
"CfiCluster": "string"
"CfiCustom": "string"
"RINormalizationRatio": "string"
}
# Iterate through the dictionary and cast columns
for column, target_type in columns_to_cast.items():
if target_type == "timestamp":
# Assuming date format is MM/dd/yyyy, adjust if needed
df = df.withColumn(column, to_timestamp(col(column), "MM/dd/yyyy"))
elif target_type == "date":
# Convert to date
df = df.withColumn(column, to_date(col(column), "MM/dd/yyyy"))
else:
# Cast to the specified type (e.g., double, int, string)
df = df.withColumn(column, col(column).cast(target_type))
return df
# Apply the casting function
casted_df = cast_columns_before_writing(add_tagged_merged_df_spark)
# Write the DataFrame to the Delta table
casted_df.write.format("delta").mode("overwrite").saveAsTable('cfi_hpd.cfi.az_cur_silver')
Editor is loading...
Leave a Comment