Untitled
unknown
python
a year ago
2.2 kB
11
Indexable
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
# Load training data
train = pd.read_csv('train_2M.csv', parse_dates=["pickup_datetime"])
def add_travel_vector_features(df):
df['abs_diff_longitude'] = (df.dropoff_longitude - df.pickup_longitude).abs()
df['abs_diff_latitude'] = (df.dropoff_latitude - df.pickup_latitude).abs()
# Add features to training data
add_travel_vector_features(train)
# Drop rows with any missing values
train = train.dropna(how='any', axis='rows')
# Filter out unrealistic values
train = train[(train.abs_diff_longitude < 5.0) & (train.abs_diff_latitude < 5.0)]
# Prepare features and target
X = train.drop('fare_amount', axis=1)
y = train['fare_amount']
# Convert all columns in X to numeric
X = X.apply(pd.to_numeric, errors='coerce')
# Load and prepare test data
test = pd.read_csv('test.csv')
key = test.key
# Add features to test data
add_travel_vector_features(test)
# Filter out unrealistic values
test = test[(test.abs_diff_longitude < 5.0) & (test.abs_diff_latitude < 5.0)]
# Convert datetime to numeric
test['pickup_datetime'] = pd.to_numeric(pd.to_datetime(test['pickup_datetime']))
# Ensure the features in the test set match those in the training set
test = test[X.columns] # Ensure test features match X features
# Convert all columns in test to numeric
test = test.apply(pd.to_numeric, errors='coerce')
# Initialize and train the model
optimal_depth = 9
dtr = DecisionTreeRegressor(max_depth=optimal_depth)
dtr.fit(X, y)
# Make predictions
test_predictions = dtr.predict(test)
# Create the submission DataFrame
submission = pd.DataFrame({
'key': key,
'fare_amount': test_predictions
})
# Ensure 'key' column is of type string
submission['key'] = submission['key'].astype(str)
# Print column types to verify
print(submission.dtypes)
# Save submission file
submission.to_csv('final_submission.csv', index=False)
print("Submission file created successfully!")
# Submit to Kaggle
!kaggle competitions submit -c new-york-city-taxi-fare-prediction -f final_submission.csv -m "Final submission test"
# Print score after submission
print(f"My new score is {3.54843}")
Editor is loading...
Leave a Comment