Untitled
unknown
plain_text
2 years ago
1.4 kB
5
Indexable
import numpy as np
from sklearn.linear_model import Perceptron
from sklearn.metrics import zero_one_loss
# Load the data
Xtr = np.load("quiz2_datafiles/Xtr.npy")
Xtst = np.load("quiz2_datafiles/Xtst.npy")
ytr = np.load("quiz2_datafiles/ytr.npy")
ytst = np.load("quiz2_datafiles/ytst.npy")
# Initialize variables
delta = 0.05
epsilon = 0.4 # Difference threshold
n_samples = len(ytr)
# Define a function to calculate Rademacher complexity
def rademacher_complexity(X, n):
r = np.random.choice([-1, 1], n)
return np.mean(np.max(np.dot(X, r)))
# Initialize training set size
n_train = 10 # Start with a small number
n_increment = 10 # Increment by 10 samples each iteration
while True:
# Train the classifier on the first n_train samples
bc = Perceptron()
bc.fit(Xtr[:n_train], ytr[:n_train])
# Calculate test error
preds = bc.predict(Xtst)
test_error = zero_one_loss(ytst, preds)
# Calculate Rademacher complexity
rad_complexity = rademacher_complexity(Xtr[:n_train], n_train)
# Calculate generalization bound
gen_bound = np.sqrt(2 * np.log(2 / delta) / n_train) + np.sqrt(2 * rad_complexity / n_train) + 1/n_train
# Calculate the difference
difference = np.abs(test_error - gen_bound)
if difference < epsilon:
print(f"Number of training samples needed: {n_train}")
break
n_train += n_increment
Editor is loading...