Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
565 B
2
Indexable
def transform(self, df, input_col, output_col):
    if not self.mapping_broadcast:
        raise ValueError(
            "The indexer has not been fitted yet. Use fit first."
        )

    # Convert the dictionary to a DataFrame
    mapping_df = SparkSession.builder.getOrCreate().createDataFrame(
        list(self.mapping_broadcast.value.items()), 
        [input_col, output_col]
    )

    # Left join and filter
    result_df = df.join(
        mapping_df, on=input_col, how='left'
    ).filter(
        col(output_col).isNotNull()
    )

    return result_df