Untitled

mail@pastecode.io avatar
unknown
python
a year ago
1.5 kB
2
Indexable
Never
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

df = pd.read_csv('train.csv')
df.drop(['id', 'has_photo', 'city', 'has_mobile', 'followers_count', 'graduation',
         'langs', 'relation', 'occupation_name'], axis=1, inplace=True)


def fill_ed(ed_f):
    if ed_f == 'Full-time':
        return 1
    return 0


df['education_form'] = df['education_form'].apply(fill_ed)


def fill_res(row):
    if pd.isnull(row['result']):
        return 0
    return row['result']


df['result'] = df.apply(fill_res, axis=1)


def ch_bdate(row):
    b_date = row['bdate'].split('.')
    if len(b_date) == 3:
        row['bdate'] = 2023 - int(b_date[2])
    else:
        row['bdate'] = np.nan
    return row

df = df.apply(ch_bdate, axis=1)
df['bdate'] = df['bdate'].fillna(df['bdate'].median)

X = df.drop('result', axis=1)
y = df['result']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit
(X_train, y_train)

y_pred = classifier.predict(X_test)
print(y_test[:5])
print(y_pred[:5])

print('ph', accuracy_score(y_test, y_pred) * 100)