Untitled
unknown
plain_text
a year ago
1.2 kB
3
Indexable
import numpy as np from sklearn.linear_model import Perceptron from sklearn.metrics import zero_one_loss from sklearn.utils import resample # Define the delta value delta = 0.05 # Define the desired bound bound = 0.4 # Calculate the VC-dimension (d + 1, where d is the number of features) d = Xtr.shape[1] vc_dimension = d + 1 # Initialize variables n_samples = len(Xtr) n_iterations = 1000 # Number of iterations for bootstrapping sample_sizes = [] for _ in range(n_iterations): # Bootstrap resample the training data X_bootstrap, y_bootstrap = resample(Xtr, ytr, replace=True, n_samples=n_samples) # Train a binary classifier bc = Perceptron() bc.fit(X_bootstrap, y_bootstrap) # Calculate the empirical VC-dimension bound preds = bc.predict(Xtst) test_error = zero_one_loss(ytst, preds) vc_bound = np.sqrt((8 * np.log(2 * n_samples / vc_dimension)) / n_samples) # Check if the condition is met if test_error - vc_bound < bound: sample_sizes.append(n_samples) # Get the range of sample sizes min_sample_size = min(sample_sizes) max_sample_size = max(sample_sizes) print(f"About {min_sample_size}-{max_sample_size} samples.")
Editor is loading...