Untitled

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_csv('student_scores.csv')
print(df.head())
#plt.scatter(x=df['Hours'], y=df['Scores'])
#plt.title('Scatterplot of hours and scores percentages')
df.plot.scatter(x='Hours', y='Scores', title='Scatterplot of hours and scores percentages')
print(df.corr())
print(df.describe())
y = df['Scores'].values.reshape(-1, 1)
X = df['Hours'].values.reshape(-1, 1)
print('X shape:', X.shape)
print('X:', X)
print(df['Hours'].values) # [2.5 5.1 3.2 8.5 3.5 1.5 9.2 ... ]
print(df['Hours'].values.shape) # (25,)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)
print(y_train)
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)
regressor.intercept_
regressor.coef_
def calc(slope, intercept, hours):
    return slope*hours+intercept
score = calc(regressor.coef_, regressor.intercept_, 9.5)
print(score)
score = regressor.predict([[9.5]])
print(score)
y_pred = regressor.predict(X_test)
df_preds = pd.DataFrame({'Actual': y_test.squeeze(), 'Predicted': y_pred.squeeze()})
print(df_preds)
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
Editor is loading...