Untitled
unknown
plain_text
3 years ago
1.4 kB
10
Indexable
import pandas as pd import matplotlib.pyplot as plt import numpy as np df = pd.read_csv('student_scores.csv') print(df.head()) #plt.scatter(x=df['Hours'], y=df['Scores']) #plt.title('Scatterplot of hours and scores percentages') df.plot.scatter(x='Hours', y='Scores', title='Scatterplot of hours and scores percentages') print(df.corr()) print(df.describe()) y = df['Scores'].values.reshape(-1, 1) X = df['Hours'].values.reshape(-1, 1) print('X shape:', X.shape) print('X:', X) print(df['Hours'].values) # [2.5 5.1 3.2 8.5 3.5 1.5 9.2 ... ] print(df['Hours'].values.shape) # (25,) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42) print(y_train) from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(X_train, y_train) regressor.intercept_ regressor.coef_ def calc(slope, intercept, hours): return slope*hours+intercept score = calc(regressor.coef_, regressor.intercept_, 9.5) print(score) score = regressor.predict([[9.5]]) print(score) y_pred = regressor.predict(X_test) df_preds = pd.DataFrame({'Actual': y_test.squeeze(), 'Predicted': y_pred.squeeze()}) print(df_preds) from sklearn.metrics import mean_absolute_error, mean_squared_error mae = mean_absolute_error(y_test, y_pred) mse = mean_squared_error(y_test, y_pred) rmse = np.sqrt(mse)
Editor is loading...