""" Naive Bayes Classifier Implementation from scratch To run the code structure the code in the following way: X be size: (num_training_examples, num_features) y be size: (num_classes, ) Where the classes are 0, 1, 2, etc. Then an example run looks like: NB = NaiveBayes(X, y) NB.fit(X) predictions = NB.predict(X) Programmed by Aladdin Persson * 2020-04-21 Initial coding """ import numpy as np class NaiveBayes: def __init__(self, X, y): self.num_examples, self.num_features = X.shape self.num_classes = len(np.unique(y)) self.eps = 1e-6 def fit(self, X): self.classes_mean = {} self.classes_variance = {} self.classes_prior = {} for c in range(self.num_classes): X_c = X[y == c] self.classes_mean[str(c)] = np.mean(X_c, axis=0) self.classes_variance[str(c)] = np.var(X_c, axis=0) self.classes_prior[str(c)] = X_c.shape[0] / X.shape[0] def predict(self, X): probs = np.zeros((self.num_examples, self.num_classes)) for c in range(self.num_classes): prior = self.classes_prior[str(c)] probs_c = self.density_function( X, self.classes_mean[str(c)], self.classes_variance[str(c)] ) probs[:, c] = probs_c + np.log(prior) return np.argmax(probs, 1) def density_function(self, x, mean, sigma): # Calculate probability from Gaussian density function const = -self.num_features / 2 * np.log(2 * np.pi) - 0.5 * np.sum( np.log(sigma + self.eps) ) probs = 0.5 * np.sum(np.power(x - mean, 2) / (sigma + self.eps), 1) return const - probs if __name__ == "__main__": X = np.loadtxt("example_data/data.txt", delimiter=",") y = np.loadtxt("example_data/targets.txt") - 1 NB = NaiveBayes(X, y) NB.fit(X) y_pred = NB.predict(X) print(f"Accuracy: {sum(y_pred==y)/X.shape[0]}")