Untitled
unknown
plain_text
2 years ago
1.3 kB
12
Indexable
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
df = pd.read_csv('/datasets/train_data_us.csv')
features = df.drop(['last_price'],axis=1)
target = df['last_price']/1000000# isolate the targets
features_train, features_valid, target_train, target_valid = train_test_split(
features, target, test_size=0.25, random_state=12345) # split 25% of data to make validation set
best_model = None
best_result = 10000
best_est = 0
best_depth = 0
for est in range(10, 51, 10):
for depth in range (1, 11):
model = RandomForestRegressor(random_state=12345,n_estimators=est)# initialize model constructor with parameters random_state=12345 and n_estimators=est
model.fit(features_train,target_train) # train model on training set
predictions_valid = model.predict(features_valid) # get model predictions on validation set
result = mean_squared_error(target_valid,predictions_valid)**0.5# calculate RMSE on validation set
if result < best_result:
best_model = model
best_result = result
best_est = est
best_depth = depth
print("RMSE of the best model on the validation set:", best_result, "n_estimators:", best_est, "best_depth:", depth)Editor is loading...
Leave a Comment