Untitled

 avatar
unknown
plain_text
a month ago
6.5 kB
2
Indexable
import numpy as np

class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000, fit_intercept=True, verbose=False):
        """
        Initialize the Logistic Regression model.

        Parameters:
        - learning_rate (float): The step size for gradient descent updates.
        - num_iterations (int): Number of iterations for training.
        - fit_intercept (bool): Whether to include an intercept term.
        - verbose (bool): If True, prints loss every 100 iterations.
        """
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.fit_intercept = fit_intercept
        self.verbose = verbose
        self.weights = None  # Model weights
        self.bias = None     # Model bias

    def __add_intercept(self, X):
        """
        Add an intercept term to the feature matrix.

        Parameters:
        - X (numpy.ndarray): Feature matrix.

        Returns:
        - numpy.ndarray: Feature matrix with intercept term.
        """
        intercept = np.ones((X.shape[0], 1))
        return np.hstack((intercept, X))

    def __sigmoid(self, z):
        """
        Compute the sigmoid function.

        Parameters:
        - z (numpy.ndarray): Linear combination of inputs and weights.

        Returns:
        - numpy.ndarray: Sigmoid of input z.
        """
        return 1 / (1 + np.exp(-z))

    def __loss(self, h, y):
        """
        Compute the loss using binary cross-entropy.

        Parameters:
        - h (numpy.ndarray): Predicted probabilities.
        - y (numpy.ndarray): True labels.

        Returns:
        - float: Loss value.
        """
        m = y.shape[0]
        # To avoid log(0), we clip h to [1e-15, 1 - 1e-15]
        h = np.clip(h, 1e-15, 1 - 1e-15)
        return (-1 / m) * (np.dot(y, np.log(h)) + np.dot((1 - y), np.log(1 - h)))

    def fit(self, X, y):
        """
        Fit the Logistic Regression model to the data using Gradient Descent.

        Parameters:
        - X (numpy.ndarray): Feature matrix of shape (n_samples, n_features).
        - y (numpy.ndarray): Binary labels of shape (n_samples,).
        """
        if self.fit_intercept:
            X = self.__add_intercept(X)
        
        # Initialize weights
        self.weights = np.zeros(X.shape[1])

        for i in range(self.num_iterations):
            z = np.dot(X, self.weights)
            h = self.__sigmoid(z)
            gradient = np.dot(X.T, (h - y)) / y.size
            self.weights -= self.learning_rate * gradient

            if self.verbose and i % 100 == 0:
                loss = self.__loss(h, y)
                print(f'Iteration {i}: loss {loss}')

    def predict_proba(self, X):
        """
        Predict probability estimates for the input data.

        Parameters:
        - X (numpy.ndarray): Feature matrix of shape (n_samples, n_features).

        Returns:
        - numpy.ndarray: Predicted probabilities of shape (n_samples,).
        """
        if self.fit_intercept:
            X = self.__add_intercept(X)
        
        return self.__sigmoid(np.dot(X, self.weights))

    def predict(self, X, threshold=0.5):
        """
        Predict binary labels for the input data.

        Parameters:
        - X (numpy.ndarray): Feature matrix of shape (n_samples, n_features).
        - threshold (float): Threshold for classifying probabilities.

        Returns:
        - numpy.ndarray: Predicted binary labels of shape (n_samples,).
        """
        return (self.predict_proba(X) >= threshold).astype(int)

    def score(self, X, y, threshold=0.5):
        """
        Calculate the accuracy of the model.

        Parameters:
        - X (numpy.ndarray): Feature matrix of shape (n_samples, n_features).
        - y (numpy.ndarray): True binary labels of shape (n_samples,).
        - threshold (float): Threshold for classifying probabilities.

        Returns:
        - float: Accuracy score.
        """
        preds = self.predict(X, threshold)
        return (preds == y).mean()



import numpy as np

class LinearRegression:
    def __init__(self):
        self.coefficients = None  # To store the model coefficients after fitting

    def fit(self, X, y):
        """
        Fit the linear regression model to the data.

        Parameters:
        X (numpy.ndarray): 2D array of shape (n_samples, n_features)
        y (numpy.ndarray): 1D or 2D array of shape (n_samples,) or (n_samples, n_targets)
        """
        # Ensure X is a 2D array
        if X.ndim == 1:
            X = X.reshape(-1, 1)
        
        # Add a column of ones to X for the intercept term
        ones = np.ones((X.shape[0], 1))
        X_b = np.hstack((ones, X))  # X with bias term

        # Compute the coefficients using the Normal Equation
        # β = (XᵀX)^(-1) Xᵀy
        try:
            XTX = X_b.T @ X_b
            XTy = X_b.T @ y
            self.coefficients = np.linalg.inv(XTX) @ XTy
        except np.linalg.LinAlgError:
            # If XTX is singular, use the pseudo-inverse
            self.coefficients = np.linalg.pinv(X_b) @ y

    def predict(self, X):
        """
        Predict using the linear regression model.

        Parameters:
        X (numpy.ndarray): 2D array of shape (n_samples, n_features)

        Returns:
        numpy.ndarray: Predicted values
        """
        if self.coefficients is None:
            raise ValueError("Model has not been fitted yet.")
        
        # Ensure X is a 2D array
        if X.ndim == 1:
            X = X.reshape(-1, 1)
        
        # Add a column of ones to X for the intercept term
        ones = np.ones((X.shape[0], 1))
        X_b = np.hstack((ones, X))  # X with bias term

        # Compute predictions
        return X_b @ self.coefficients

    def score(self, X, y):
        """
        Calculate the coefficient of determination R^2 of the prediction.

        Parameters:
        X (numpy.ndarray): 2D array of shape (n_samples, n_features)
        y (numpy.ndarray): 1D or 2D array of shape (n_samples,) or (n_samples, n_targets)

        Returns:
        float: R^2 score
        """
        y_pred = self.predict(X)
        ss_res = np.sum((y - y_pred) ** 2)
        ss_tot = np.sum((y - np.mean(y)) ** 2)
        return 1 - ss_res / ss_tot

Leave a Comment