import numpy as np
from sklearn.linear_model import Perceptron
from sklearn.metrics import zero_one_loss
from sklearn.utils import resample
# Define the delta value
delta = 0.05
# Define the desired bound
bound = 0.4
# Calculate the VC-dimension (d + 1, where d is the number of features)
d = Xtr.shape[1]
vc_dimension = d + 1
# Initialize variables
n_samples = len(Xtr)
n_iterations = 1000 # Number of iterations for bootstrapping
sample_sizes = []
for _ in range(n_iterations):
# Bootstrap resample the training data
X_bootstrap, y_bootstrap = resample(Xtr, ytr, replace=True, n_samples=n_samples)
# Train a binary classifier
bc = Perceptron()
bc.fit(X_bootstrap, y_bootstrap)
# Calculate the empirical VC-dimension bound
preds = bc.predict(Xtst)
test_error = zero_one_loss(ytst, preds)
vc_bound = np.sqrt((8 * np.log(2 * n_samples / vc_dimension)) / n_samples)
# Check if the condition is met
if test_error - vc_bound < bound:
sample_sizes.append(n_samples)
# Get the range of sample sizes
min_sample_size = min(sample_sizes)
max_sample_size = max(sample_sizes)
print(f"About {min_sample_size}-{max_sample_size} samples.")