Untitled
unknown
plain_text
2 years ago
1.3 kB
9
Indexable
import pandas as pd from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error df = pd.read_csv('/datasets/train_data_us.csv') features = df.drop(['last_price'],axis=1) target = df['last_price']/1000000# isolate the targets features_train, features_valid, target_train, target_valid = train_test_split( features, target, test_size=0.25, random_state=12345) # split 25% of data to make validation set best_model = None best_result = 10000 best_est = 0 best_depth = 0 for est in range(10, 51, 10): for depth in range (1, 11): model = RandomForestRegressor(random_state=12345,n_estimators=est)# initialize model constructor with parameters random_state=12345 and n_estimators=est model.fit(features_train,target_train) # train model on training set predictions_valid = model.predict(features_valid) # get model predictions on validation set result = mean_squared_error(target_valid,predictions_valid)**0.5# calculate RMSE on validation set if result < best_result: best_model = model best_result = result best_est = est best_depth = depth print("RMSE of the best model on the validation set:", best_result, "n_estimators:", best_est, "best_depth:", depth)
Editor is loading...
Leave a Comment