Untitled
unknown
plain_text
2 years ago
1.1 kB
4
Indexable
import numpy as np from sklearn.linear_model import Perceptron from sklearn.metrics import zero_one_loss from sklearn.utils import resample # Define the delta value delta = 0.05 # Define the desired bound bound = 0.4 # Initialize variables n_samples = len(Xtr) n_iterations = 1000 # Number of iterations for bootstrapping sample_sizes = [] for _ in range(n_iterations): # Bootstrap resample the training data X_bootstrap, y_bootstrap = resample(Xtr, ytr, replace=True, n_samples=n_samples) # Train a binary classifier bc = Perceptron() bc.fit(X_bootstrap, y_bootstrap) # Calculate the empirical Rademacher bound preds = bc.predict(Xtst) test_error = zero_one_loss(ytst, preds) rademacher_bound = np.sqrt((np.log(2/delta)) / (2 * n_samples)) # Check if the condition is met if test_error - rademacher_bound < bound: sample_sizes.append(n_samples) # Get the range of sample sizes min_sample_size = min(sample_sizes) max_sample_size = max(sample_sizes) print(f"About {min_sample_size}-{max_sample_size} samples.")
Editor is loading...