Untitled

 avatar
unknown
python
2 years ago
1.7 kB
5
Indexable
import panda as pd

def download_data():
    '''
    This function downloads the data from the URL listed above, and
    stores it in a DataFrame.
    '''
    url = 'https://think.cs.vt.edu/corgis/datasets/csv/cars/cars.csv'
    df = pd.read_csv(url)
    return df

def feature_engineering(df):
    '''
    This function reduces the dataset by removing unnecessary columns and
    outliers.
    '''
    df = df[df['Make'] == 'Honda']
    df = df[df['Fuel Information.Fuel Types'] == 'Gasoline']
    df = df[df['Engine Information.Engine Statistics.Horsepower'] < df['Engine Information.Engine Statistics.Horsepower'].quantile(0.90)]
    df = df[['Fuel Information.Highway mpg', 'Engine Information.Engine Statistics.Horsepower']]
    return df

def normalize_data(df):
    '''
    This function normalizes the data so that all values are between 0 and 1.
    '''
    df = (df - df.min()) / (df.max() - df.min())
    return df

def linear_regression(df):
    '''
    This function performs linear regression on the dataset.
    '''
    y = df['Fuel Information.Highway mpg']
    x = df['Engine Information.Engine Statistics.Horsepower']
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    return slope, intercept, r_value, p_value, std_err

def main():
    df = download_data()
    df = feature_engineering(df)
    df = normalize_data(df)
    slope, intercept, r_value, p_value, std_err = linear_regression(df)
    print('slope:', slope)
    print('intercept:', intercept)
    print('r_value:', r_value)
    print('p_value:', p_value)
    print('std_err:', std_err)
    
if __name__ == '__main__':
    main()
Editor is loading...