Untitled
unknown
java
a year ago
1.1 kB
2
Indexable
import pandas as pd import matplotlib.pyplot as plt startup=pd.read_csv("Data sets/Fish.csv") print(startup) print(startup.shape) print(startup.columns) print(startup['Species'].unique()) startup.isnull().sum() print(startup.head()) startup['Species'].replace(['Bream','Roach','Whitefish','Parkki','Perch','Pike','Smelt'],[0,1,2,3,4,5,6],inplace=True) print(startup.head()) x=startup.iloc[:,[0,2,3,4,5,6]].values y=startup.iloc[:,1].values from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=4) from sklearn.preprocessing import StandardScaler sc=StandardScaler() x_train=sc.fit_transform(x_train) x_test=sc.fit_transform(x_test) from sklearn.linear_model import LinearRegression clf=LinearRegression() clf.fit(x_train,y_train) y_pred=clf.predict(x_test) print("pred :",y_pred[0]) print("test :",y_test[0]) from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score r2 = r2_score(y_test, y_pred) print("accuracy:",r2) startup.plot.scatter(x='Species',y='Weight')
Editor is loading...
Leave a Comment