mail@pastecode.io avatar
6 months ago
1.3 kB
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

df = pd.read_csv('/datasets/train_data_us.csv')

features = df.drop(['last_price'],axis=1)
target = df['last_price']/1000000# isolate the targets

features_train, features_valid, target_train, target_valid = train_test_split(
    features, target, test_size=0.25, random_state=12345) # split 25% of data to make validation set

best_model = None
best_result = 10000
best_est = 0
best_depth = 0
for est in range(10, 51, 10):
    for depth in range (1, 11):
        model = RandomForestRegressor(random_state=12345,n_estimators=est)# initialize model constructor with parameters random_state=12345 and n_estimators=est
        model.fit(features_train,target_train) # train model on training set
        predictions_valid = model.predict(features_valid) # get model predictions on validation set
        result = mean_squared_error(target_valid,predictions_valid)**0.5# calculate RMSE on validation set
        if result < best_result:
            best_model = model
            best_result = result
            best_est = est
            best_depth = depth

print("RMSE of the best model on the validation set:", best_result, "n_estimators:", best_est, "best_depth:", depth)
Leave a Comment