Untitled

mail@pastecode.io avatar
unknown
python
a year ago
1.2 kB
3
Indexable
Never
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

data=pd.read_csv('../hanhnd/winequality-red.csv')

# Visual data
data.head
data.info()

# Kiểm tra dữ liệu
data.describe()
data[data.isnull()=='True'].count()

# Tạo các feature
features = ['fixed acidity','volatile acidity','citric acid','residual sugar','chlorides',
            'free sulfur dioxide','total sulfur dioxide','density','pH','sulphates','alcohol']
            
# Tạo field label
target = ['quality']

# Ánh xạ dữ liệu
X = data[features]
y = data[target]


# Tách dữ liệu ra train và test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# Chạy model
model = linear_model.LinearRegression()
model = model.fit(y_train, X_train)
predicted_data = model.predict(y_test)
predicted_data = np.round_(predicted_data)

print (mean_squared_error(X_test,predicted_data))


print (predicted_data)