Untitled
unknown
python
3 years ago
3.0 kB
7
Indexable
%%writefile {train_src_dir}/main.py import os import argparse import pandas as pd import mlflow import mlflow.sklearn from sklearn.ensemble import GradientBoostingClassifier from sklearn.metrics import classification_report from sklearn.model_selection import train_test_split def main(): """Main function of the script.""" # input and output arguments parser = argparse.ArgumentParser() parser.add_argument("--data", type=str, help="path to input data") parser.add_argument("--test_train_ratio", type=float, required=False, default=0.25) parser.add_argument("--n_estimators", required=False, default=100, type=int) parser.add_argument("--learning_rate", required=False, default=0.1, type=float) parser.add_argument("--registered_model_name", type=str, help="model name") args = parser.parse_args() # Start Logging mlflow.start_run() # enable autologging mlflow.sklearn.autolog() ################### #<prepare the data> ################### print(" ".join(f"{k}={v}" for k, v in vars(args).items())) print("input data:", args.data) credit_df = pd.read_excel(args.data, header=1, index_col=0) mlflow.log_metric("num_samples", credit_df.shape[0]) mlflow.log_metric("num_features", credit_df.shape[1] - 1) train_df, test_df = train_test_split( credit_df, test_size=args.test_train_ratio, ) #################### #</prepare the data> #################### ################## #<train the model> ################## # Extracting the label column y_train = train_df.pop("default payment next month") # convert the dataframe values to array X_train = train_df.values # Extracting the label column y_test = test_df.pop("default payment next month") # convert the dataframe values to array X_test = test_df.values print(f"Training with data of shape {X_train.shape}") clf = GradientBoostingClassifier( n_estimators=args.n_estimators, learning_rate=args.learning_rate ) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print(classification_report(y_test, y_pred)) ################### #</train the model> ################### ########################## #<save and register model> ########################## # Registering the model to the workspace print("Registering the model via MLFlow") mlflow.sklearn.log_model( sk_model=clf, registered_model_name=args.registered_model_name, artifact_path=args.registered_model_name, ) # Saving the model to a file mlflow.sklearn.save_model( sk_model=clf, path=os.path.join(args.registered_model_name, "trained_model"), ) ########################### #</save and register model> ########################### # Stop Logging mlflow.end_run() if __name__ == "__main__": main()
Editor is loading...