Untitled

def mini_batch_gradient_descent(X, y, batch_size=32, learning_rate=0.01, iterations=1000):
    num_samples, num_features = X.shape
    theta = np.zeros((num_features, 1))  # Initialize theta to zeros
    cost_history = []

    for i in range(iterations):
        # Shuffle the data at the start of each epoch
        indices = np.random.permutation(num_samples)
        X_shuffled = X[indices]
        y_shuffled = y[indices]

        # Loop over mini-batches
        for start in range(0, num_samples, batch_size):
            end = min(start + batch_size, num_samples)
            X_batch = X_shuffled[start:end]
            y_batch = y_shuffled[start:end]

            # Compute predictions for the batch
            predictions = X_batch.dot(theta)

            # Compute the error
            error = predictions - y_batch

            # Compute the gradient
            gradients = (1 / batch_size) * X_batch.T.dot(error)

            # Update the parameters
            theta -= learning_rate * gradients

        # Optionally, compute the cost for monitoring progress
        cost = (1 / (2 * num_samples)) * np.sum((X.dot(theta) - y) ** 2)
        cost_history.append(cost)

    return theta, cost_history
Editor is loading...