Untitled
unknown
python
3 years ago
1.7 kB
8
Indexable
import panda as pd
def download_data():
'''
This function downloads the data from the URL listed above, and
stores it in a DataFrame.
'''
url = 'https://think.cs.vt.edu/corgis/datasets/csv/cars/cars.csv'
df = pd.read_csv(url)
return df
def feature_engineering(df):
'''
This function reduces the dataset by removing unnecessary columns and
outliers.
'''
df = df[df['Make'] == 'Honda']
df = df[df['Fuel Information.Fuel Types'] == 'Gasoline']
df = df[df['Engine Information.Engine Statistics.Horsepower'] < df['Engine Information.Engine Statistics.Horsepower'].quantile(0.90)]
df = df[['Fuel Information.Highway mpg', 'Engine Information.Engine Statistics.Horsepower']]
return df
def normalize_data(df):
'''
This function normalizes the data so that all values are between 0 and 1.
'''
df = (df - df.min()) / (df.max() - df.min())
return df
def linear_regression(df):
'''
This function performs linear regression on the dataset.
'''
y = df['Fuel Information.Highway mpg']
x = df['Engine Information.Engine Statistics.Horsepower']
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
return slope, intercept, r_value, p_value, std_err
def main():
df = download_data()
df = feature_engineering(df)
df = normalize_data(df)
slope, intercept, r_value, p_value, std_err = linear_regression(df)
print('slope:', slope)
print('intercept:', intercept)
print('r_value:', r_value)
print('p_value:', p_value)
print('std_err:', std_err)
if __name__ == '__main__':
main()Editor is loading...