Untitled

from sklearn.model_selection import train_test_split

# Perform train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply gradient descent on the training set
theta, cost_history = gradient_descent(X_train, y_train)

# Evaluate performance on the test set
predictions = X_test.dot(theta)
test_error = (1 / (2 * len(y_test))) * np.sum((predictions - y_test) ** 2)

print("Test Error:", test_error)

# For cross-validation, we'll use KFold from sklearn:

from sklearn.model_selection import KFold

# Define KFold cross-validation
kf = KFold(n_splits=5)

# Loop over the splits
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the model on the training set
    theta, cost_history = gradient_descent(X_train, y_train)

    # Evaluate on the test set
    predictions = X_test.dot(theta)
    test_error = (1 / (2 * len(y_test))) * np.sum((predictions - y_test) ** 2)
    
    print("Fold Test Error:", test_error)
Editor is loading...