Untitled
user_3298650655
python
4 years ago
1.5 kB
6
Indexable
from sklearn.model_selection import train_test_split from sklearn.metrics import f1_score def get_frequent(x): return np.bincount(x).argmax() class KNN: def __init__(self, h): self.h = h self.metric = 'euclid' def fit(self, X, y): self._X = X self._y = y self.y_values = np.unique(y) return self def predict(self, X): y_pred = [] for x1 in X: labels = [] weights = [] sorted_points = sorted( zip(self._X, self._y), key=lambda t: self.distance(x1, t[0]) ) if not sorted_points: y_pred.append(np.random.choice(self.y_values)) continue for x2, y in sorted_points: distance = self.distance(x1, x2) if distance > self.h: break labels.append(y) weights.append(self.K(distance)) labels = np.array(labels) weights = np.array(weights) ind = np.argmax([np.sum((labels == _y) * weights) for _y in self.y_values]) y_pred.append(self.y_values[ind]) return np.array(y_pred) def K(self, r): return (1 - np.abs(r / self.h)) def distance(self, x1, x2): return distances[self.metric](x1, x2) y = data['target'] x = data['data'] X_train, X_test, y_train, y_test = train_test_split(x, y) values = [0, 1, 2, 3, 4] f1_list = [] for h in values: knn = KNN(h) knn.fit(X_train, y_train) y_pred = knn.predict(X_test) score = f1_score(y_test, y_pred) f1_list.append(score) plt.plot(values, f1_list)
Editor is loading...