import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
df = pd.read_csv('train.csv')
df.drop(['id', 'has_photo', 'city', 'has_mobile', 'followers_count', 'graduation',
'langs', 'relation', 'occupation_name'], axis=1, inplace=True)
def fill_ed(ed_f):
if ed_f == 'Full-time':
return 1
return 0
df['education_form'] = df['education_form'].apply(fill_ed)
def fill_res(row):
if pd.isnull(row['result']):
return 0
return row['result']
df['result'] = df.apply(fill_res, axis=1)
def ch_bdate(row):
b_date = row['bdate'].split('.')
if len(b_date) == 3:
row['bdate'] = 2023 - int(b_date[2])
else:
row['bdate'] = np.nan
return row
df = df.apply(ch_bdate, axis=1)
df['bdate'] = df['bdate'].fillna(df['bdate'].median)
X = df.drop('result', axis=1)
y = df['result']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit
(X_train, y_train)
y_pred = classifier.predict(X_test)
print(y_test[:5])
print(y_pred[:5])
print('ph', accuracy_score(y_test, y_pred) * 100)