Untitled
unknown
python
2 years ago
1.7 kB
5
Indexable
import panda as pd def download_data(): ''' This function downloads the data from the URL listed above, and stores it in a DataFrame. ''' url = 'https://think.cs.vt.edu/corgis/datasets/csv/cars/cars.csv' df = pd.read_csv(url) return df def feature_engineering(df): ''' This function reduces the dataset by removing unnecessary columns and outliers. ''' df = df[df['Make'] == 'Honda'] df = df[df['Fuel Information.Fuel Types'] == 'Gasoline'] df = df[df['Engine Information.Engine Statistics.Horsepower'] < df['Engine Information.Engine Statistics.Horsepower'].quantile(0.90)] df = df[['Fuel Information.Highway mpg', 'Engine Information.Engine Statistics.Horsepower']] return df def normalize_data(df): ''' This function normalizes the data so that all values are between 0 and 1. ''' df = (df - df.min()) / (df.max() - df.min()) return df def linear_regression(df): ''' This function performs linear regression on the dataset. ''' y = df['Fuel Information.Highway mpg'] x = df['Engine Information.Engine Statistics.Horsepower'] slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) return slope, intercept, r_value, p_value, std_err def main(): df = download_data() df = feature_engineering(df) df = normalize_data(df) slope, intercept, r_value, p_value, std_err = linear_regression(df) print('slope:', slope) print('intercept:', intercept) print('r_value:', r_value) print('p_value:', p_value) print('std_err:', std_err) if __name__ == '__main__': main()
Editor is loading...