Untitled
unknown
python
a year ago
1.5 kB
2
Indexable
Never
import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import confusion_matrix, accuracy_score df = pd.read_csv('train.csv') df.drop(['id', 'has_photo', 'city', 'has_mobile', 'followers_count', 'graduation', 'langs', 'relation', 'occupation_name'], axis=1, inplace=True) def fill_ed(ed_f): if ed_f == 'Full-time': return 1 return 0 df['education_form'] = df['education_form'].apply(fill_ed) def fill_res(row): if pd.isnull(row['result']): return 0 return row['result'] df['result'] = df.apply(fill_res, axis=1) def ch_bdate(row): b_date = row['bdate'].split('.') if len(b_date) == 3: row['bdate'] = 2023 - int(b_date[2]) else: row['bdate'] = np.nan return row df = df.apply(ch_bdate, axis=1) df['bdate'] = df['bdate'].fillna(df['bdate'].median) X = df.drop('result', axis=1) y = df['result'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) classifier = KNeighborsClassifier(n_neighbors=5) classifier.fit (X_train, y_train) y_pred = classifier.predict(X_test) print(y_test[:5]) print(y_pred[:5]) print('ph', accuracy_score(y_test, y_pred) * 100)