Untitled
unknown
plain_text
2 years ago
565 B
11
Indexable
def transform(self, df, input_col, output_col):
if not self.mapping_broadcast:
raise ValueError(
"The indexer has not been fitted yet. Use fit first."
)
# Convert the dictionary to a DataFrame
mapping_df = SparkSession.builder.getOrCreate().createDataFrame(
list(self.mapping_broadcast.value.items()),
[input_col, output_col]
)
# Left join and filter
result_df = df.join(
mapping_df, on=input_col, how='left'
).filter(
col(output_col).isNotNull()
)
return result_dfEditor is loading...