Untitled
unknown
plain_text
21 days ago
2.9 kB
2
Indexable
Never
import pandas as pd import joblib # Import joblib directly from sklearn.tree import DecisionTreeClassifier from sklearn import tree import matplotlib.pyplot as plt # Load the training data data = pd.read_csv('C:/Users/Panos/Downloads/training_data_car_ins.csv') # Define Age of Driver Classification def classify_age_of_driver(age): if age < 25: return '<25' elif 25 <= age <= 74: return '25-74' else: return '75+' # Define Age of Licence Classification def classify_age_of_licence(age): return '<4' if age < 4 else '4+' # Define Age of Policy Classification def classify_age_of_policy(age): return '<1' if age < 1 else '1+' # Apply the classifications to the training data data['Age of Driver Class'] = data['Age of Driver'].apply(classify_age_of_driver) data['Age of Licence Class'] = data['Age of Licence'].apply(classify_age_of_licence) data['Age of Policy Class'] = data['Age of Policy'].apply(classify_age_of_policy) # Prepare the feature and target variables X = pd.get_dummies(data[['Age of Driver Class', 'Age of Licence Class', 'Age of Policy Class']], drop_first=True) y = data['No Claims'] # Train the model using the entire dataset model = DecisionTreeClassifier(random_state=42) model.fit(X, y) # Save the trained model to a file joblib.dump(model, 'decision_tree_model.pkl') print("Model saved.") # ---------------------------- # Now for applying the model # ---------------------------- # Load the new dataset (where 'No Claims' is missing) new_data = pd.read_csv('C:/Users/Panos/Downloads/test_data_car_ins.csv') # Preprocess the new data new_data['Age of Driver Class'] = new_data['Age of Driver'].apply(classify_age_of_driver) new_data['Age of Licence Class'] = new_data['Age of Licence'].apply(classify_age_of_licence) new_data['Age of Policy Class'] = new_data['Age of Policy'].apply(classify_age_of_policy) # Convert the new data's categorical variables into dummy variables X_new = pd.get_dummies(new_data[['Age of Driver Class', 'Age of Licence Class', 'Age of Policy Class']], drop_first=True) # Load the trained model from the file model = joblib.load('decision_tree_model.pkl') # Ensure that the new data has the same columns as the training data training_columns = model.feature_names_in_ # Add any missing columns in the new dataset with a value of 0 for col in training_columns: if col not in X_new.columns: X_new[col] = 0 # Ensure the new data has exactly the same columns (and order) as the training data X_new = X_new[training_columns] # Predict the 'No Claims' column using the trained model new_data['Predicted No Claims'] = model.predict(X_new) # Save the predictions to a new CSV file new_data.to_csv('C:/Users/Panos/Downloads/new_car_insurance_data_with_predictions.csv', index=False) print("Predictions saved to new CSV file.")
Leave a Comment