Untitled

import numpy as np
from sklearn.linear_model import Perceptron
from sklearn.metrics import zero_one_loss

# Load the data
Xtr = np.load("quiz2_datafiles/Xtr.npy")
Xtst = np.load("quiz2_datafiles/Xtst.npy")
ytr = np.load("quiz2_datafiles/ytr.npy")
ytst = np.load("quiz2_datafiles/ytst.npy")

# Initialize variables
delta = 0.05
epsilon = 0.4  # Difference threshold
n_samples = len(ytr)

# Define a function to calculate Rademacher complexity
def rademacher_complexity(X, n):
    r = np.random.choice([-1, 1], n)
    return np.mean(np.max(np.dot(X, r)))

# Initialize training set size
n_train = 10  # Start with a small number
n_increment = 10  # Increment by 10 samples each iteration

while True:
    # Train the classifier on the first n_train samples
    bc = Perceptron()
    bc.fit(Xtr[:n_train], ytr[:n_train])

    # Calculate test error
    preds = bc.predict(Xtst)
    test_error = zero_one_loss(ytst, preds)

    # Calculate Rademacher complexity
    rad_complexity = rademacher_complexity(Xtr[:n_train], n_train)

    # Calculate generalization bound
    gen_bound = np.sqrt(2 * np.log(2 / delta) / n_train) + np.sqrt(2 * rad_complexity / n_train) + 1/n_train

    # Calculate the difference
    difference = np.abs(test_error - gen_bound)

    if difference < epsilon:
        print(f"Number of training samples needed: {n_train}")
        break

    n_train += n_increment
Editor is loading...