Untitled
user_3298650655
python
4 years ago
1.5 kB
9
Indexable
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
def get_frequent(x):
return np.bincount(x).argmax()
class KNN:
def __init__(self, h):
self.h = h
self.metric = 'euclid'
def fit(self, X, y):
self._X = X
self._y = y
self.y_values = np.unique(y)
return self
def predict(self, X):
y_pred = []
for x1 in X:
labels = []
weights = []
sorted_points = sorted(
zip(self._X, self._y),
key=lambda t: self.distance(x1, t[0])
)
if not sorted_points:
y_pred.append(np.random.choice(self.y_values))
continue
for x2, y in sorted_points:
distance = self.distance(x1, x2)
if distance > self.h:
break
labels.append(y)
weights.append(self.K(distance))
labels = np.array(labels)
weights = np.array(weights)
ind = np.argmax([np.sum((labels == _y) * weights) for _y in self.y_values])
y_pred.append(self.y_values[ind])
return np.array(y_pred)
def K(self, r):
return (1 - np.abs(r / self.h))
def distance(self, x1, x2):
return distances[self.metric](x1, x2)
y = data['target']
x = data['data']
X_train, X_test, y_train, y_test = train_test_split(x, y)
values = [0, 1, 2, 3, 4]
f1_list = []
for h in values:
knn = KNN(h)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
score = f1_score(y_test, y_pred)
f1_list.append(score)
plt.plot(values, f1_list)
Editor is loading...