Untitled

mail@pastecode.io avatar
unknown
python
23 days ago
2.3 kB
3
Indexable
Never
import numpy as np
import numpy.typing as npt

class NaiveBayesClassifier:
    def get_descriptive(self, x_train, y_train) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
        """
        Step 1: Calculate feature descriptive by class
        """
        unique_classes = np.unique(y_train)
        num_classes = len(unique_classes)
        num_features = x_train.shape[1]

        means = np.zeros((num_classes, num_features), dtype=np.float64)
        variances = np.zeros((num_classes, num_features), dtype=np.float64)

        for i, class_label in enumerate(unique_classes):
            class_indices = (y_train == class_label)
            class_data = x_train[class_indices]

            means[i, :] = np.mean(class_data, axis=0)
            variances[i, :] = np.var(class_data, axis=0)
            
        self.mean = means
        self.variance = varaince
        return self.mean, self.variance

    def get_priors(self, y_train: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
        """
        Step 2: Calculate prior probabilities
        """
        unique_classes, counts = np.unique(y_train, return_counts=True)
        priors = counts / len(y_train)
        self.prior = priors
        return self.prior

    def gaussian_density(self, class_idx: np.int64, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:

        """
        Step 3: Implement the Gaussian Density
        """
        
        mean = self.mean[class_idx]
        variance = self.variance[class_idx]
        
        exponent = np.exp(-((x - mean) ** 2) / (2 * variance))
        density = (1 / np.sqrt(2 * np.pi * variance)) * exponent

        return density


    def get_prediction(self, x: npt.NDArray[np.float64]) -> int:
        """
        Step 4: Calculate the posterior probabilities and make prediction
        """
        num_classes = len(self.prior)
        posterior_probs = np.zeros(num_classes, dtype=np.float64)

        for i in range(num_classes):
            class_prior = np.log(self.prior[i])
            feature_likelihoods = np.sum(np.log(self.gaussian_density(x, self.mean[i, :], self.variance[i, :])))
            posterior_probs[i] = class_prior + feature_likelihoods

        predicted_class = np.argmax(posterior_probs)
        return predicted_class