Untitled
import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import sklearn df = pd.read_csv("output.csv") df.head(100) sns.displot(df['ft']) sns.displot(df['se']) sns.jointplot(x=df['ft'], y=df['se']) X=df.drop(['se'],axis=1) y=df['se'] best = 0 score = 0 from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix,accuracy_score, recall_score, precision_score for i in range(0,1000,10): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i) reg = LogisticRegression(solver='lbfgs', max_iter=1000) reg.fit(X_train, y_train) predictions = reg.predict(X_test) if accuracy_score(y_test, predictions) > score: best = i score = accuracy_score(y_test, predictions) #print(best, score) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=best) reg = LogisticRegression(solver='lbfgs', max_iter=1000) reg.fit(X_train, y_train) predictions = reg.predict(X_test) print(predictions) cnt_r=0 cnt_p=0 for i in range(0, 30): if X_test['re'].iloc[i] == 0: cnt_r+=1 if(predictions[i] == 2 and X_test['ft'].iloc[i] == 0): cnt_p+=1 elif(predictions[i]+1 == X_test['ft'].iloc[i]): cnt_p+=1 cnt_r /= 30 cnt_p /= 30 print(f"最佳參數:{best} 正確率: {score}") print(f"預測勝率:{cnt_p}, 實際勝率:{cnt_r}") result_df = X_test.copy() result_df['True'] = y_test.values result_df['Predicted'] = predictions print(result_df)
Leave a Comment