Untitled

 avatar
unknown
plain_text
a year ago
1.2 kB
2
Indexable
import numpy as np
from sklearn.linear_model import Perceptron
from sklearn.metrics import zero_one_loss
from sklearn.utils import resample

# Define the delta value
delta = 0.05

# Define the desired bound
bound = 0.4

# Calculate the VC-dimension (d + 1, where d is the number of features)
d = Xtr.shape[1]
vc_dimension = d + 1

# Initialize variables
n_samples = len(Xtr)
n_iterations = 1000  # Number of iterations for bootstrapping
sample_sizes = []

for _ in range(n_iterations):
    # Bootstrap resample the training data
    X_bootstrap, y_bootstrap = resample(Xtr, ytr, replace=True, n_samples=n_samples)

    # Train a binary classifier
    bc = Perceptron()
    bc.fit(X_bootstrap, y_bootstrap)

    # Calculate the empirical VC-dimension bound
    preds = bc.predict(Xtst)
    test_error = zero_one_loss(ytst, preds)
    vc_bound = np.sqrt((8 * np.log(2 * n_samples / vc_dimension)) / n_samples)

    # Check if the condition is met
    if test_error - vc_bound < bound:
        sample_sizes.append(n_samples)

# Get the range of sample sizes
min_sample_size = min(sample_sizes)
max_sample_size = max(sample_sizes)

print(f"About {min_sample_size}-{max_sample_size} samples.")