Untitled
unknown
plain_text
2 years ago
1.4 kB
4
Indexable
import numpy as np from sklearn.linear_model import Perceptron from sklearn.metrics import zero_one_loss # Load the data Xtr = np.load("quiz2_datafiles/Xtr.npy") Xtst = np.load("quiz2_datafiles/Xtst.npy") ytr = np.load("quiz2_datafiles/ytr.npy") ytst = np.load("quiz2_datafiles/ytst.npy") # Initialize variables delta = 0.05 epsilon = 0.4 # Difference threshold n_samples = len(ytr) # Define a function to calculate Rademacher complexity def rademacher_complexity(X, n): r = np.random.choice([-1, 1], n) return np.mean(np.max(np.dot(X, r))) # Initialize training set size n_train = 10 # Start with a small number n_increment = 10 # Increment by 10 samples each iteration while True: # Train the classifier on the first n_train samples bc = Perceptron() bc.fit(Xtr[:n_train], ytr[:n_train]) # Calculate test error preds = bc.predict(Xtst) test_error = zero_one_loss(ytst, preds) # Calculate Rademacher complexity rad_complexity = rademacher_complexity(Xtr[:n_train], n_train) # Calculate generalization bound gen_bound = np.sqrt(2 * np.log(2 / delta) / n_train) + np.sqrt(2 * rad_complexity / n_train) + 1/n_train # Calculate the difference difference = np.abs(test_error - gen_bound) if difference < epsilon: print(f"Number of training samples needed: {n_train}") break n_train += n_increment
Editor is loading...