Untitled

mail@pastecode.io avatar
unknown
plain_text
20 days ago
1.3 kB
2
Indexable
Never
from pyspark.sql.functions import col, to_timestamp, to_date

def cast_columns_before_writing(df):
    # Define the specific columns and the types you want to cast
    columns_to_cast = {
        "ServiceInfo1": "string"
        "AvailabilityZone": "string"
        "InvoiceSectionId": "string"
        "CostAllocationRuleName": "string"
        "Fab": "string"
        "CfiCluster": "string"
        "CfiCustom": "string"
        "RINormalizationRatio": "string"
    }

    # Iterate through the dictionary and cast columns
    for column, target_type in columns_to_cast.items():
        if target_type == "timestamp":
            # Assuming date format is MM/dd/yyyy, adjust if needed
            df = df.withColumn(column, to_timestamp(col(column), "MM/dd/yyyy"))
        elif target_type == "date":
            # Convert to date
            df = df.withColumn(column, to_date(col(column), "MM/dd/yyyy"))
        else:
            # Cast to the specified type (e.g., double, int, string)
            df = df.withColumn(column, col(column).cast(target_type))

    return df

# Apply the casting function
casted_df = cast_columns_before_writing(add_tagged_merged_df_spark)

# Write the DataFrame to the Delta table
casted_df.write.format("delta").mode("overwrite").saveAsTable('cfi_hpd.cfi.az_cur_silver')
Leave a Comment