Untitled
unknown
plain_text
20 days ago
1.3 kB
2
Indexable
Never
from pyspark.sql.functions import col, to_timestamp, to_date def cast_columns_before_writing(df): # Define the specific columns and the types you want to cast columns_to_cast = { "ServiceInfo1": "string" "AvailabilityZone": "string" "InvoiceSectionId": "string" "CostAllocationRuleName": "string" "Fab": "string" "CfiCluster": "string" "CfiCustom": "string" "RINormalizationRatio": "string" } # Iterate through the dictionary and cast columns for column, target_type in columns_to_cast.items(): if target_type == "timestamp": # Assuming date format is MM/dd/yyyy, adjust if needed df = df.withColumn(column, to_timestamp(col(column), "MM/dd/yyyy")) elif target_type == "date": # Convert to date df = df.withColumn(column, to_date(col(column), "MM/dd/yyyy")) else: # Cast to the specified type (e.g., double, int, string) df = df.withColumn(column, col(column).cast(target_type)) return df # Apply the casting function casted_df = cast_columns_before_writing(add_tagged_merged_df_spark) # Write the DataFrame to the Delta table casted_df.write.format("delta").mode("overwrite").saveAsTable('cfi_hpd.cfi.az_cur_silver')
Leave a Comment