Untitled

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score


def get_frequent(x):
  return np.bincount(x).argmax()

class KNN:
  def __init__(self, h):
    self.h = h
    self.metric = 'euclid'

  def fit(self, X, y):
    self._X = X
    self._y = y

    self.y_values = np.unique(y)

    return self

  def predict(self, X):
    y_pred = []

    for x1 in X:
      labels = []
      weights = []

      sorted_points = sorted(
          zip(self._X, self._y), 
          key=lambda t: self.distance(x1, t[0])
      )
     
      if not sorted_points:
        y_pred.append(np.random.choice(self.y_values))
        continue

      for x2, y in sorted_points:
        distance = self.distance(x1, x2)
        
        if distance > self.h:
          break

        labels.append(y)
        weights.append(self.K(distance))

      labels = np.array(labels)
      weights = np.array(weights)

      ind = np.argmax([np.sum((labels == _y) * weights) for _y in self.y_values])
      y_pred.append(self.y_values[ind])
        
    return np.array(y_pred)

  def K(self, r):
    return (1 - np.abs(r / self.h))

  def distance(self, x1, x2):
    return distances[self.metric](x1, x2)


y = data['target']
x = data['data']

X_train, X_test, y_train, y_test = train_test_split(x, y)


values = [0, 1, 2, 3, 4]
f1_list = []

for h in values:
  knn = KNN(h)
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  score = f1_score(y_test, y_pred)
  f1_list.append(score)

plt.plot(values, f1_list)
Editor is loading...