Untitled
unknown
python
a year ago
1.2 kB
3
Indexable
Never
import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn import linear_model from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures data=pd.read_csv('../hanhnd/winequality-red.csv') # Visual data data.head data.info() # Kiểm tra dữ liệu data.describe() data[data.isnull()=='True'].count() # Tạo các feature features = ['fixed acidity','volatile acidity','citric acid','residual sugar','chlorides', 'free sulfur dioxide','total sulfur dioxide','density','pH','sulphates','alcohol'] # Tạo field label target = ['quality'] # Ánh xạ dữ liệu X = data[features] y = data[target] # Tách dữ liệu ra train và test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) # Chạy model model = linear_model.LinearRegression() model = model.fit(y_train, X_train) predicted_data = model.predict(y_test) predicted_data = np.round_(predicted_data) print (mean_squared_error(X_test,predicted_data)) print (predicted_data)