Untitled
unknown
python
3 years ago
1.2 kB
3
Indexable
import pandas as pd
def download_data():
df = pd.read_csv(urlData)
return df
def feature_engineering(df):
df = df[df['Fuel Information.Fuel Types'] == 'Gasoline']
df = df[df['Make'] == 'Honda']
df = df[df['Fuel Information.Highway mpg'] < df['Fuel Information.Highway mpg'].quantile(0.90)]
df = df.loc[:, ['Fuel Information.Highway mpg', 'Engine Information.Engine Statistics.Horsepower']]
df = (df - df.min()) / (df.max() - df.min())
return df
def linear_regression(x, y):
x = x.values
y = y.values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
model = LinearRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print('Predicted values: ', y_pred)
print('Coefficient: ', model.coef_)
print('Intercept: ', model.intercept_)
print('Mean squared error: %.2f'
% mean_squared_error(y_test, y_pred))
print('R2 score: %.2f' % r2_score(y_test, y_pred))
if __name__ == '__main__':
df = download_data()
df = feature_engineering(df)
linear_regression(df['Fuel Information.Highway mpg'], df['Engine Information.Engine Statistics.Horsepower'])Editor is loading...