Untitled

 avatar
unknown
python
2 years ago
1.2 kB
2
Indexable
import pandas as pd

def download_data():
    df = pd.read_csv(urlData)
    return df

def feature_engineering(df):
    df = df[df['Fuel Information.Fuel Types'] == 'Gasoline']
    df = df[df['Make'] == 'Honda']
    df = df[df['Fuel Information.Highway mpg'] < df['Fuel Information.Highway mpg'].quantile(0.90)]
    df = df.loc[:, ['Fuel Information.Highway mpg', 'Engine Information.Engine Statistics.Horsepower']]
    df = (df - df.min()) / (df.max() - df.min())
    return df

def linear_regression(x, y):
    x = x.values
    y = y.values
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
    model = LinearRegression()
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    print('Predicted values: ', y_pred)
    print('Coefficient: ', model.coef_)
    print('Intercept: ', model.intercept_)
    print('Mean squared error: %.2f'
        % mean_squared_error(y_test, y_pred))
    print('R2 score: %.2f' % r2_score(y_test, y_pred))

if __name__ == '__main__':
    df = download_data()
    df = feature_engineering(df)
    linear_regression(df['Fuel Information.Highway mpg'], df['Engine Information.Engine Statistics.Horsepower'])
Editor is loading...