Untitled
unknown
python
2 years ago
3.6 kB
8
Indexable
from pyspark.ml.classification import RandomForestClassifier, GBTClassifier
from pyspark.ml.feature import VectorAssembler
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import BinaryClassificationEvaluator, MulticlassClassificationEvaluator
# Define the Random Forest model
rf = RandomForestClassifier(numTrees=50,
maxDepth=10,
labelCol='churn',
featuresCol='features_m',
predictionCol='rf_pred',
probabilityCol='rf_prob',
rawPredictionCol='rf_raw_pred',
minInstancesPerNode=100,
maxBins=32,
featureSubsetStrategy="auto",
subsamplingRate=1,
impurity='gini',
seed=2023)
# Define the GBT model
gbt = GBTClassifier(featuresCol="features_m",
labelCol='churn',
predictionCol="gbt_pred",
maxDepth=5,
maxBins=32,
minInstancesPerNode=1,
minInfoGain=0.0,
maxMemoryInMB=256,
cacheNodeIds=True,
checkpointInterval=10,
lossType="logistic",
maxIter=20,
stepSize=0.1,
seed=961928988,
subsamplingRate=1.0)
# Create a VectorAssembler to combine features into a single vector column
assembler = VectorAssembler(inputCols=["features_m"], outputCol="features")
# Create a pipeline with the Random Forest and GBT models
pipeline = Pipeline(stages=[assembler, rf, gbt])
# Fit the pipeline on your data
model = pipeline.fit(train_m)
# Make predictions on your data using the ensemble model
train_predictions = model.transform(train_m)
test_predictions = model.transform(test_m)
# Evaluate ROC-AUC on the training data
evaluator_train_roc = BinaryClassificationEvaluator(labelCol='churn', rawPredictionCol='rf_prob', metricName='areaUnderROC')
auc_train_roc = evaluator_train_roc.evaluate(train_predictions)
# Evaluate ROC-AUC on the test data
evaluator_test_roc = BinaryClassificationEvaluator(labelCol='churn', rawPredictionCol='rf_prob', metricName='areaUnderROC')
auc_test_roc = evaluator_test_roc.evaluate(test_predictions)
# Evaluate ROC-PR on the training data
evaluator_train_pr = BinaryClassificationEvaluator(labelCol='churn', rawPredictionCol='rf_prob', metricName='areaUnderPR')
auc_train_pr = evaluator_train_pr.evaluate(train_predictions)
# Evaluate ROC-PR on the test data
evaluator_test_pr = BinaryClassificationEvaluator(labelCol='churn', rawPredictionCol='rf_prob', metricName='areaUnderPR')
auc_test_pr = evaluator_test_pr.evaluate(test_predictions)
# Evaluate F1-score on the training data
evaluator_train_f1 = MulticlassClassificationEvaluator(labelCol='churn', predictionCol='rf_pred', metricName='f1')
f1_train = evaluator_train_f1.evaluate(train_predictions)
# Evaluate F1-score on the test data
evaluator_test_f1 = MulticlassClassificationEvaluator(labelCol='churn', predictionCol='rf_pred', metricName='f1')
f1_test = evaluator_test_f1.evaluate(test_predictions)
# Print the evaluation results
print("Train ROC-AUC: ", auc_train_roc)
print("Test ROC-AUC: ", auc_test_roc)
print("Train ROC-PR: ", auc_train_pr)
print("Test ROC-PR: ", auc_test_pr)
print("Train F1-score: ", f1_train)
print("Test F1-score: ", f1_test)Editor is loading...