Untitled

mail@pastecode.io avatar
unknown
python
22 days ago
2.2 kB
3
Indexable
Never
import pandas as pd
from sklearn.tree import DecisionTreeRegressor

# Load training data
train = pd.read_csv('train_2M.csv', parse_dates=["pickup_datetime"])

def add_travel_vector_features(df):
    df['abs_diff_longitude'] = (df.dropoff_longitude - df.pickup_longitude).abs()
    df['abs_diff_latitude'] = (df.dropoff_latitude - df.pickup_latitude).abs()

# Add features to training data
add_travel_vector_features(train)

# Drop rows with any missing values
train = train.dropna(how='any', axis='rows')

# Filter out unrealistic values
train = train[(train.abs_diff_longitude < 5.0) & (train.abs_diff_latitude < 5.0)]

# Prepare features and target
X = train.drop('fare_amount', axis=1)
y = train['fare_amount']

# Convert all columns in X to numeric
X = X.apply(pd.to_numeric, errors='coerce')

# Load and prepare test data
test = pd.read_csv('test.csv')

key = test.key

# Add features to test data
add_travel_vector_features(test)

# Filter out unrealistic values
test = test[(test.abs_diff_longitude < 5.0) & (test.abs_diff_latitude < 5.0)]

# Convert datetime to numeric
test['pickup_datetime'] = pd.to_numeric(pd.to_datetime(test['pickup_datetime']))

# Ensure the features in the test set match those in the training set
test = test[X.columns]  # Ensure test features match X features

# Convert all columns in test to numeric
test = test.apply(pd.to_numeric, errors='coerce')

# Initialize and train the model
optimal_depth = 9
dtr = DecisionTreeRegressor(max_depth=optimal_depth)
dtr.fit(X, y)

# Make predictions
test_predictions = dtr.predict(test)

# Create the submission DataFrame
submission = pd.DataFrame({
    'key': key,
    'fare_amount': test_predictions
})

# Ensure 'key' column is of type string
submission['key'] = submission['key'].astype(str)

# Print column types to verify
print(submission.dtypes)

# Save submission file
submission.to_csv('final_submission.csv', index=False)

print("Submission file created successfully!")

# Submit to Kaggle
!kaggle competitions submit -c new-york-city-taxi-fare-prediction -f final_submission.csv -m "Final submission test"
# Print score after submission

print(f"My new score is {3.54843}")
Leave a Comment