Initial commit

2026-02-20 13:50:41 +00:00 · 2021-01-30 21:49:15 +01:00
commit 65b8c80495
432 changed files with 1290844 additions and 0 deletions
--- a/ML/algorithms/decisiontree/decision_tree.py
+++ b/ML/algorithms/decisiontree/decision_tree.py
@@ -0,0 +1,282 @@
+"""
+Author: Philip Andreadis
+e-mail: philip_andreadis@hotmail.com
+
+
+Implementation of Decision Tree model from scratch.
+Metric used to apply the split on the data is the Gini index which is calculated for each feature's single value
+in order to find the best split on each step. This means there is room for improvement performance wise as this
+process is O(n^2) and can be reduced to linear complexity.
+
+Parameters of the model:
+max_depth (int): Maximum depth of the decision tree
+min_node_size (int): Minimum number of instances a node can have. If this threshold is exceeded the node is terminated
+
+Both are up to the user to set.
+
+Input dataset to train() function must be a numpy array containing both feature and label values.
+
+"""
+
+
+from collections import Counter
+import numpy as np
+
+
+class DecisionTree:
+    def __init__(self, max_depth, min_node_size):
+        self.max_depth = max_depth
+        self.min_node_size = min_node_size
+        self.final_tree = {}
+
+    """
+        This function calculates the gini index of a split in the dataset
+        Firstly the gini score is calculated for each child note and the resulting Gini is the weighted sum of gini_left and gini_right
+
+        Parameters:
+        child_nodes (list of np arrays): The two groups of instances resulting from the split
+
+        Returns:
+        float:Gini index of the split 
+
+       """
+
+    def calculate_gini(self, child_nodes):
+        n = 0
+        # Calculate number of all instances of the parent node
+        for node in child_nodes:
+            n = n + len(node)
+        gini = 0
+        # Calculate gini index for each child node
+        for node in child_nodes:
+            m = len(node)
+
+            # Avoid division by zero if a child node is empty
+            if m == 0:
+                continue
+
+            # Create a list with each instance's class value
+            y = []
+            for row in node:
+                y.append(row[-1])
+
+            # Count the frequency for each class value
+            freq = Counter(y).values()
+            node_gini = 1
+            for i in freq:
+                node_gini = node_gini - (i / m) ** 2
+            gini = gini + (m / n) * node_gini
+        return gini
+
+    """
+            This function splits the dataset on certain value of a feature
+            Parameters:
+            feature_index (int): Index of selected feature
+            
+            threshold : Value of the feature split point
+            
+
+            Returns:
+            np.array: Two new groups of split instances
+
+           """
+
+    def apply_split(self, feature_index, threshold, data):
+        instances = data.tolist()
+        left_child = []
+        right_child = []
+        for row in instances:
+            if row[feature_index] < threshold:
+                left_child.append(row)
+            else:
+                right_child.append(row)
+        left_child = np.array(left_child)
+        right_child = np.array(right_child)
+        return left_child, right_child
+
+    """
+                This function finds the best split on the dataset on each iteration of the algorithm by evaluating
+                all possible splits and applying the one with the minimum Gini index.
+                Parameters:
+                data: Dataset
+
+                Returns node (dict): Dictionary with the index of the splitting feature and its value and the two child nodes
+
+               """
+
+    def find_best_split(self, data):
+        num_of_features = len(data[0]) - 1
+        gini_score = 1000
+        f_index = 0
+        f_value = 0
+        # Iterate through each feature and find minimum gini score
+        for column in range(num_of_features):
+            for row in data:
+                value = row[column]
+                l, r = self.apply_split(column, value, data)
+                children = [l, r]
+                score = self.calculate_gini(children)
+                # print("Candidate split feature X{} < {} with Gini score {}".format(column,value,score))
+                if score < gini_score:
+                    gini_score = score
+                    f_index = column
+                    f_value = value
+                    child_nodes = children
+        # print("Chosen feature is {} and its value is {} with gini index {}".format(f_index,f_value,gini_score))
+        node = {"feature": f_index, "value": f_value, "children": child_nodes}
+        return node
+
+    """
+        This function calculates the most frequent class value in a group of instances
+        Parameters:
+        node: Group of instances
+
+        Returns : Most common class value
+
+    """
+
+    def calc_class(self, node):
+        # Create a list with each instance's class value
+        y = []
+        for row in node:
+            y.append(row[-1])
+        # Find most common class value
+        occurence_count = Counter(y)
+        return occurence_count.most_common(1)[0][0]
+
+    """
+        Recursive function that builds the decision tree by applying split on every child node until they become terminal.
+        Cases to terminate a node is: i.max depth of tree is reached ii.minimum size of node is not met iii.child node is empty
+        Parameters:
+        node: Group of instances
+        depth (int): Current depth of the tree
+
+
+    """
+
+    def recursive_split(self, node, depth):
+        l, r = node["children"]
+        del node["children"]
+        if l.size == 0:
+            c_value = self.calc_class(r)
+            node["left"] = node["right"] = {"class_value": c_value, "depth": depth}
+            return
+        elif r.size == 0:
+            c_value = self.calc_class(l)
+            node["left"] = node["right"] = {"class_value": c_value, "depth": depth}
+            return
+        # Check if tree has reached max depth
+        if depth >= self.max_depth:
+            # Terminate left child node
+            c_value = self.calc_class(l)
+            node["left"] = {"class_value": c_value, "depth": depth}
+            # Terminate right child node
+            c_value = self.calc_class(r)
+            node["right"] = {"class_value": c_value, "depth": depth}
+            return
+        # process left child
+        if len(l) <= self.min_node_size:
+            c_value = self.calc_class(l)
+            node["left"] = {"class_value": c_value, "depth": depth}
+        else:
+            node["left"] = self.find_best_split(l)
+            self.recursive_split(node["left"], depth + 1)
+        # process right child
+        if len(r) <= self.min_node_size:
+            c_value = self.calc_class(r)
+            node["right"] = {"class_value": c_value, "depth": depth}
+        else:
+            node["right"] = self.find_best_split(r)
+            self.recursive_split(node["right"], depth + 1)
+
+    """
+        Apply the recursive split algorithm on the data in order to build the decision tree
+        Parameters:
+        X (np.array): Training data
+        
+        Returns tree (dict): The decision tree in the form of a dictionary.
+    """
+
+    def train(self, X):
+        # Create initial node
+        tree = self.find_best_split(X)
+        # Generate the rest of the tree via recursion
+        self.recursive_split(tree, 1)
+        self.final_tree = tree
+        return tree
+
+    """
+        Prints out the decision tree.
+        Parameters:
+        tree (dict): Decision tree
+
+    """
+
+    def print_dt(self, tree, depth=0):
+        if "feature" in tree:
+            print(
+                "\nSPLIT NODE: feature #{} < {} depth:{}\n".format(
+                    tree["feature"], tree["value"], depth
+                )
+            )
+            self.print_dt(tree["left"], depth + 1)
+            self.print_dt(tree["right"], depth + 1)
+        else:
+            print(
+                "TERMINAL NODE: class value:{} depth:{}".format(
+                    tree["class_value"], tree["depth"]
+                )
+            )
+
+    """
+        This function outputs the class value of the instance given based on the decision tree created previously.
+        Parameters:
+        tree (dict): Decision tree
+        instance(id np.array): Single instance of data
+
+        Returns (float): predicted class value of the given instance
+    """
+
+    def predict_single(self, tree, instance):
+        if not tree:
+            print("ERROR: Please train the decision tree first")
+            return -1
+        if "feature" in tree:
+            if instance[tree["feature"]] < tree["value"]:
+                return self.predict_single(tree["left"], instance)
+            else:
+                return self.predict_single(tree["right"], instance)
+        else:
+            return tree["class_value"]
+
+    """
+        This function outputs the class value for each instance of the given dataset.
+        Parameters:
+        X (np.array): Dataset with labels
+        
+        Returns y (np.array): array with the predicted class values of the dataset
+    """
+
+    def predict(self, X):
+        y_predict = []
+        for row in X:
+            y_predict.append(self.predict_single(self.final_tree, row))
+        return np.array(y_predict)
+
+
+if __name__ == "__main__":
+
+    # # test dataset
+    # X = np.array([[1, 1,0], [3, 1, 0], [1, 4, 0], [2, 4, 1], [3, 3, 1], [5, 1, 1]])
+    # y = np.array([0, 0, 0, 1, 1, 1])
+
+    train_data = np.loadtxt("example_data/data.txt", delimiter=",")
+    train_y = np.loadtxt("example_data/targets.txt")
+
+    # Build tree
+    dt = DecisionTree(5, 1)
+    tree = dt.train(train_data)
+    y_pred = dt.predict(train_data)
+    print(f"Accuracy: {sum(y_pred == train_y) / train_y.shape[0]}")
+    # Print out the decision tree
+    # dt.print_dt(tree)
--- a/ML/algorithms/decisiontree/example_data/data.txt
+++ b/ML/algorithms/decisiontree/example_data/data.txt
@@ -0,0 +1,90 @@
+1.1107,   -2.1079,   1
+-0.5498,    0.0943,  1
+-0.0382,    1.8829,1
+0.0555,   -0.6139,1
+0.5870,   -1.2067,1
+0.5453,    0.2509,1
+-0.3927,   -0.6220,1
+-1.1905,   -1.8785,1
+-0.4240,    0.7772,1
+-0.7139,    1.5846,1
+-0.8883,    2.1408,1
+-0.6922,    0.0993,1
+1.4350,    1.2334,1
+-0.7576,    0.7386,1
+-1.1144,   -1.7059,1
+0.6612,   -1.7296,1
+-2.1381,   -0.0600,1
+1.3857,    1.2178,1
+-1.4951,    0.0373,1
+0.8029,    0.9739,1
+1.5607,    1.5862,1
+0.8563,   -1.4245,1
+0.0397,   -1.3799,1
+1.2331,    1.7421,1
+-2.0015,    0.8355,1
+-0.3428,   -0.4780,1
+-0.8891,    1.2634,1
+0.3832,   -0.1189,1
+0.4172,    1.0132,1
+-0.8695,   -0.7947,1
+2.9737,    3.6438,2
+3.7680,    1.8649,2
+0.1166,    0.9435,2
+0.6896,    3.9160,2
+1.2234,    2.9899,2
+2.3009,    0.4150,2
+3.7693,    3.8027,2
+1.9450,    3.4208,2
+0.9290,    3.3611,2
+5.0027,    2.7870,2
+1.0101,    1.8737,2
+2.0751,    2.2628,2
+1.9113,    3.6777,2
+2.3127,    3.9130,2
+1.9392,    2.3976,2
+3.1218,    2.5495,2
+1.7032,    1.1509,2
+0.4212,    3.5322,2
+2.7686,    0.9402,2
+2.1696,    2.9285,2
+0.3380,    2.0947,2
+3.6886,    0.4054,2
+2.6315,    3.1962,2
+-0.5332,    3.1421,2
+0.3380,    3.0801,2
+1.4030,    1.1841,2
+2.8739,    2.7777,2
+1.1254,    3.2404,2
+0.0988,    1.9522,2
+0.3688,    2.8904,2
+1.4758,   -1.6387,3
+1.9289,   -1.8191,3
+2.5741,   -1.3213,3
+2.1917,   -1.2852,3
+0.8358,   -2.3349,3
+2.6863,   -1.8834,3
+3.1102,   -0.4854,3
+3.7073,   -0.6466,3
+3.6394,   -0.4097,3
+0.5365,   -3.6555,3
+2.9295,   -0.3819,3
+0.8168,   -3.1133,3
+1.3432,   -1.7717,3
+1.1039,   -2.2261,3
+1.3754,   -2.2236,3
+0.6757,   -2.5379,3
+-0.2029,   -3.8420,3
+2.4210,   -1.9788,3
+1.0335,   -2.6042,3
+0.9638,   -2.9449,3
+-0.8198,   -5.4449,3
+1.9552,   -1.5530,3
+0.3505,   -3.1887,3
+2.4943,   -1.8116,3
+1.9761,   -1.0664,3
+0.5994,   -3.0513,3
+2.2076,   -1.6728,3
+1.9941,   -1.8826,3
+1.7487,   -2.9644,3
+1.4160,   -2.4234,3
--- a/ML/algorithms/decisiontree/example_data/targets.txt
+++ b/ML/algorithms/decisiontree/example_data/targets.txt
@@ -0,0 +1,90 @@
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
--- a/ML/algorithms/kmeans/kmeansclustering.py
+++ b/ML/algorithms/kmeans/kmeansclustering.py
@@ -0,0 +1,97 @@
+"""
+From scratch implementation of K means clustering which is a unsupervised 
+clustering  method that works by iteratively computing new centroids and 
+moving centroids to the center of the new formed clusters.
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-05-28 Initial coding
+
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.datasets import make_blobs
+
+
+class KMeansClustering:
+    def __init__(self, X, num_clusters):
+        self.K = num_clusters
+        self.max_iterations = 100
+        self.plot_figure = True
+        self.num_examples = X.shape[0]
+        self.num_features = X.shape[1]
+
+    def initialize_random_centroids(self, X):
+        centroids = np.zeros((self.K, self.num_features))
+
+        for k in range(self.K):
+            centroid = X[np.random.choice(range(self.num_examples))]
+            centroids[k] = centroid
+
+        return centroids
+
+    def create_clusters(self, X, centroids):
+        # Will contain a list of the points that are associated with that specific cluster
+        clusters = [[] for _ in range(self.K)]
+
+        # Loop through each point and check which is the closest cluster
+        for point_idx, point in enumerate(X):
+            closest_centroid = np.argmin(
+                np.sqrt(np.sum((point - centroids) ** 2, axis=1))
+            )
+            clusters[closest_centroid].append(point_idx)
+
+        return clusters
+
+    def calculate_new_centroids(self, clusters, X):
+        centroids = np.zeros((self.K, self.num_features))
+        for idx, cluster in enumerate(clusters):
+            new_centroid = np.mean(X[cluster], axis=0)
+            centroids[idx] = new_centroid
+
+        return centroids
+
+    def predict_cluster(self, clusters, X):
+        y_pred = np.zeros(self.num_examples)
+
+        for cluster_idx, cluster in enumerate(clusters):
+            for sample_idx in cluster:
+                y_pred[sample_idx] = cluster_idx
+
+        return y_pred
+
+    def plot_fig(self, X, y):
+        plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral)
+        plt.show()
+
+    def fit(self, X):
+        centroids = self.initialize_random_centroids(X)
+
+        for it in range(self.max_iterations):
+            clusters = self.create_clusters(X, centroids)
+
+            previous_centroids = centroids
+            centroids = self.calculate_new_centroids(clusters, X)
+
+            diff = centroids - previous_centroids
+
+            if not diff.any():
+                print("Termination criterion satisfied")
+                break
+
+        # Get label predictions
+        y_pred = self.predict_cluster(clusters, X)
+
+        if self.plot_figure:
+            self.plot_fig(X, y_pred)
+
+        return y_pred
+
+
+if __name__ == "__main__":
+    np.random.seed(10)
+    num_clusters = 3
+    X, _ = make_blobs(n_samples=1000, n_features=2, centers=num_clusters)
+
+    Kmeans = KMeansClustering(X, num_clusters)
+    y_pred = Kmeans.fit(X)
--- a/ML/algorithms/knn/pycache/utils.cpython-37.pyc
+++ b/ML/algorithms/knn/pycache/utils.cpython-37.pyc
--- a/ML/algorithms/knn/example_data/data.txt
+++ b/ML/algorithms/knn/example_data/data.txt
@@ -0,0 +1,90 @@
+1.1107,   -2.1079
+-0.5498,    0.0943
+-0.0382,    1.8829
+0.0555,   -0.6139
+0.5870,   -1.2067
+0.5453,    0.2509
+-0.3927,   -0.6220
+-1.1905,   -1.8785
+-0.4240,    0.7772
+-0.7139,    1.5846
+-0.8883,    2.1408
+-0.6922,    0.0993
+1.4350,    1.2334
+-0.7576,    0.7386
+-1.1144,   -1.7059
+0.6612,   -1.7296
+-2.1381,   -0.0600
+1.3857,    1.2178
+-1.4951,    0.0373
+0.8029,    0.9739
+1.5607,    1.5862
+0.8563,   -1.4245
+0.0397,   -1.3799
+1.2331,    1.7421
+-2.0015,    0.8355
+-0.3428,   -0.4780
+-0.8891,    1.2634
+0.3832,   -0.1189
+0.4172,    1.0132
+-0.8695,   -0.7947
+2.9737,    3.6438
+3.7680,    1.8649
+0.1166,    0.9435
+0.6896,    3.9160
+1.2234,    2.9899
+2.3009,    0.4150
+3.7693,    3.8027
+1.9450,    3.4208
+0.9290,    3.3611
+5.0027,    2.7870
+1.0101,    1.8737
+2.0751,    2.2628
+1.9113,    3.6777
+2.3127,    3.9130
+1.9392,    2.3976
+3.1218,    2.5495
+1.7032,    1.1509
+0.4212,    3.5322
+2.7686,    0.9402
+2.1696,    2.9285
+0.3380,    2.0947
+3.6886,    0.4054
+2.6315,    3.1962
+-0.5332,    3.1421
+0.3380,    3.0801
+1.4030,    1.1841
+2.8739,    2.7777
+1.1254,    3.2404
+0.0988,    1.9522
+0.3688,    2.8904
+1.4758,   -1.6387
+1.9289,   -1.8191
+2.5741,   -1.3213
+2.1917,   -1.2852
+0.8358,   -2.3349
+2.6863,   -1.8834
+3.1102,   -0.4854
+3.7073,   -0.6466
+3.6394,   -0.4097
+0.5365,   -3.6555
+2.9295,   -0.3819
+0.8168,   -3.1133
+1.3432,   -1.7717
+1.1039,   -2.2261
+1.3754,   -2.2236
+0.6757,   -2.5379
+-0.2029,   -3.8420
+2.4210,   -1.9788
+1.0335,   -2.6042
+0.9638,   -2.9449
+-0.8198,   -5.4449
+1.9552,   -1.5530
+0.3505,   -3.1887
+2.4943,   -1.8116
+1.9761,   -1.0664
+0.5994,   -3.0513
+2.2076,   -1.6728
+1.9941,   -1.8826
+1.7487,   -2.9644
+1.4160,   -2.4234
--- a/ML/algorithms/knn/example_data/targets.txt
+++ b/ML/algorithms/knn/example_data/targets.txt
@@ -0,0 +1,90 @@
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
--- a/ML/algorithms/knn/knn.py
+++ b/ML/algorithms/knn/knn.py
@@ -0,0 +1,110 @@
+"""
+Implementation of K-nearest neighbor (KNN) from scratch
+where you can either use 2-loops (inefficient), 1-loop (better)
+or a heavily vectorized zero-loop implementation.
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-24 Initial coding
+"""
+
+import numpy as np
+
+
+class KNearestNeighbor:
+    def __init__(self, k):
+        self.k = k
+        self.eps = 1e-8
+
+    def train(self, X, y):
+        self.X_train = X
+        self.y_train = y
+
+    def predict(self, X_test, num_loops=0):
+        if num_loops == 0:
+            distances = self.compute_distance_vectorized(X_test)
+
+        elif num_loops == 1:
+            distances = self.compute_distance_one_loop(X_test)
+
+        else:
+            distances = self.compute_distance_two_loops(X_test)
+
+        return self.predict_labels(distances)
+
+    def compute_distance_two_loops(self, X_test):
+        """
+        Inefficient naive implementation, use only
+        as a way of understanding what kNN is doing
+        """
+
+        num_test = X_test.shape[0]
+        num_train = self.X_train.shape[0]
+        distances = np.zeros((num_test, num_train))
+
+        for i in range(num_test):
+            for j in range(num_train):
+                # (Taking sqrt is not necessary: min distance won't change since sqrt is monotone)
+                distances[i, j] = np.sqrt(
+                    self.eps + np.sum((X_test[i, :] - self.X_train[j, :]) ** 2)
+                )
+
+        return distances
+
+    def compute_distance_one_loop(self, X_test):
+        """
+        Much better than two-loops but not as fast as fully vectorized version.
+        Utilize Numpy broadcasting in X_train - X_test[i,:]
+        """
+        num_test = X_test.shape[0]
+        num_train = self.X_train.shape[0]
+        distances = np.zeros((num_test, num_train))
+
+        for i in range(num_test):
+            # (Taking sqrt is not necessary: min distance won't change since sqrt is monotone)
+            distances[i, :] = np.sqrt(
+                self.eps + np.sum((self.X_train - X_test[i, :]) ** 2, axis=1)
+            )
+
+        return distances
+
+    def compute_distance_vectorized(self, X_test):
+        """
+        Can be tricky to understand this, we utilize heavy
+        vecotorization as well as numpy broadcasting.
+        Idea: if we have two vectors a, b (two examples)
+        and for vectors we can compute (a-b)^2 = a^2 - 2a (dot) b + b^2
+        expanding on this and doing so for every vector lends to the 
+        heavy vectorized formula for all examples at the same time.
+        """
+        X_test_squared = np.sum(X_test ** 2, axis=1, keepdims=True)
+        X_train_squared = np.sum(self.X_train ** 2, axis=1, keepdims=True)
+        two_X_test_X_train = np.dot(X_test, self.X_train.T)
+
+        # (Taking sqrt is not necessary: min distance won't change since sqrt is monotone)
+        return np.sqrt(
+            self.eps + X_test_squared - 2 * two_X_test_X_train + X_train_squared.T
+        )
+
+    def predict_labels(self, distances):
+        num_test = distances.shape[0]
+        y_pred = np.zeros(num_test)
+
+        for i in range(num_test):
+            y_indices = np.argsort(distances[i, :])
+            k_closest_classes = self.y_train[y_indices[: self.k]].astype(int)
+            y_pred[i] = np.argmax(np.bincount(k_closest_classes))
+
+        return y_pred
+
+
+if __name__ == "__main__":
+    X = np.loadtxt("example_data/data.txt", delimiter=",")
+    y = np.loadtxt("example_data/targets.txt")
+
+    X = np.array([[1, 1], [3, 1], [1, 4], [2, 4], [3, 3], [5, 1]])
+    y = np.array([0, 0, 0, 1, 1, 1])
+
+    KNN = KNearestNeighbor(k=1)
+    KNN.train(X, y)
+    y_pred = KNN.predict(X, num_loops=0)
+    print(f"Accuracy: {sum(y_pred == y) / y.shape[0]}")
--- a/ML/algorithms/linearregression/pycache/linear_regression_gradient_descent.cpython-37.pyc
+++ b/ML/algorithms/linearregression/pycache/linear_regression_gradient_descent.cpython-37.pyc
--- a/ML/algorithms/linearregression/pycache/linear_regression_normal_equation.cpython-37.pyc
+++ b/ML/algorithms/linearregression/pycache/linear_regression_normal_equation.cpython-37.pyc
--- a/ML/algorithms/linearregression/linear_regression_gradient_descent.py
+++ b/ML/algorithms/linearregression/linear_regression_gradient_descent.py
@@ -0,0 +1,62 @@
+"""
+Implementation of Linear Regression using Gradient Descent.
+
+Let m = #training examples, n = #number of features Sizes differ 
+a little bit from blog notation. It takes as input the following: 
+y is R^(1 x m), X is R^(n x m), w is R^(n x 1)
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-03 Initial coding
+*    2020-04-25 Updated comments, and small changes in code
+"""
+
+import numpy as np
+
+
+class LinearRegression:
+    def __init__(self, print_cost=False):
+        self.learning_rate = 0.01
+        self.total_iterations = 1000
+        self.print_cost = print_cost
+
+    def y_hat(self, X, w):
+        return np.dot(w.T, X)
+
+    def cost(self, yhat, y):
+        C = 1 / self.m * np.sum(np.power(yhat - y, 2))
+
+        return C
+
+    def gradient_descent(self, w, X, y, yhat):
+        dCdW = 2 / self.m * np.dot(X, (yhat - y).T)
+        w = w - self.learning_rate * dCdW
+
+        return w
+
+    def main(self, X, y):
+        # Add x1 = 1
+        ones = np.ones((1, X.shape[1]))
+        X = np.append(ones, X, axis=0)
+
+        self.m = X.shape[1]
+        self.n = X.shape[0]
+
+        w = np.zeros((self.n, 1))
+
+        for it in range(self.total_iterations + 1):
+            yhat = self.y_hat(X, w)
+            cost = self.cost(yhat, y)
+
+            if it % 2000 == 0 and self.print_cost:
+                print(f"Cost at iteration {it} is {cost}")
+
+            w = self.gradient_descent(w, X, y, yhat)
+
+        return w
+
+
+if __name__ == "__main__":
+    X = np.random.rand(1, 500)
+    y = 3 * X + 5 + np.random.randn(1, 500) * 0.1
+    regression = LinearRegression()
+    w = regression.main(X, y)
--- a/ML/algorithms/linearregression/linear_regression_normal_equation.py
+++ b/ML/algorithms/linearregression/linear_regression_normal_equation.py
@@ -0,0 +1,28 @@
+"""
+Implementation of Linear Regression using the Normal Equation.
+
+Let m = #training examples, n = #number of features and the
+input shapes are y is R^(m x 1), X is R^(m x n), w is R^(n x 1).
+Using these shapes, the normal equation implementation is
+exactly as the derived formula :) 
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-25 Initial coding
+"""
+
+import numpy as np
+
+
+def linear_regression_normal_equation(X, y):
+    ones = np.ones((X.shape[0], 1))
+    X = np.append(ones, X, axis=1)
+    W = np.dot(np.linalg.pinv(np.dot(X.T, X)), np.dot(X.T, y))
+    return W
+
+
+if __name__ == "__main__":
+    # Run a small test example: y = 5x (approximately)
+    m, n = 500, 1
+    X = np.random.rand(m, n)
+    y = 5 * X + np.random.randn(m, n) * 0.1
+    W = linear_regression_normal_equation(X, y)
--- a/ML/algorithms/logisticregression/pycache/utils.cpython-37.pyc
+++ b/ML/algorithms/logisticregression/pycache/utils.cpython-37.pyc
--- a/ML/algorithms/logisticregression/logistic_regression.py
+++ b/ML/algorithms/logisticregression/logistic_regression.py
@@ -0,0 +1,70 @@
+"""
+From scratch implementation of Logistic Regression
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-05-24 Initial coding
+
+"""
+
+import numpy as np
+from sklearn.datasets import make_blobs
+
+
+class LogisticRegression:
+    def __init__(self, X, learning_rate=0.1, num_iters=10000):
+        self.lr = learning_rate
+        self.num_iters = num_iters
+
+        # m for #training_examples, n for #features
+        self.m, self.n = X.shape
+
+    def train(self, X, y):
+        # init weights
+        self.weights = np.zeros((self.n, 1))
+        self.bias = 0
+
+        for it in range(self.num_iters + 1):
+            # calculate hypothesis
+            y_predict = self.sigmoid(np.dot(X, self.weights) + self.bias)
+
+            # calculate cost
+            cost = (
+                -1
+                / self.m
+                * np.sum(y * np.log(y_predict) + (1 - y) * np.log(1 - y_predict))
+            )
+
+            # back prop / gradient calculations
+            dw = 1 / self.m * np.dot(X.T, (y_predict - y))
+            db = 1 / self.m * np.sum(y_predict - y)
+
+            # gradient descent update step
+            self.weights -= self.lr * dw
+            self.bias -= self.lr * db
+
+            # print cost sometimes
+            if it % 1000 == 0:
+                print(f"Cost after iteration {it}: {cost}")
+
+        return self.weights, self.bias
+
+    def predict(self, X):
+        y_predict = self.sigmoid(np.dot(X, self.weights) + self.bias)
+        y_predict_labels = y_predict > 0.5
+
+        return y_predict_labels
+
+    def sigmoid(self, z):
+        return 1 / (1 + np.exp(-z))
+
+
+if __name__ == "__main__":
+    np.random.seed(1)
+    X, y = make_blobs(n_samples=1000, centers=2)
+    y = y[:, np.newaxis]
+
+    logreg = LogisticRegression(X)
+    w, b = logreg.train(X, y)
+    y_predict = logreg.predict(X)
+
+    print(f"Accuracy: {np.sum(y==y_predict)/X.shape[0]}")
--- a/ML/algorithms/naivebayes/example_data/data.txt
+++ b/ML/algorithms/naivebayes/example_data/data.txt
@@ -0,0 +1,90 @@
+1.1107,   -2.1079
+-0.5498,    0.0943
+-0.0382,    1.8829
+0.0555,   -0.6139
+0.5870,   -1.2067
+0.5453,    0.2509
+-0.3927,   -0.6220
+-1.1905,   -1.8785
+-0.4240,    0.7772
+-0.7139,    1.5846
+-0.8883,    2.1408
+-0.6922,    0.0993
+1.4350,    1.2334
+-0.7576,    0.7386
+-1.1144,   -1.7059
+0.6612,   -1.7296
+-2.1381,   -0.0600
+1.3857,    1.2178
+-1.4951,    0.0373
+0.8029,    0.9739
+1.5607,    1.5862
+0.8563,   -1.4245
+0.0397,   -1.3799
+1.2331,    1.7421
+-2.0015,    0.8355
+-0.3428,   -0.4780
+-0.8891,    1.2634
+0.3832,   -0.1189
+0.4172,    1.0132
+-0.8695,   -0.7947
+2.9737,    3.6438
+3.7680,    1.8649
+0.1166,    0.9435
+0.6896,    3.9160
+1.2234,    2.9899
+2.3009,    0.4150
+3.7693,    3.8027
+1.9450,    3.4208
+0.9290,    3.3611
+5.0027,    2.7870
+1.0101,    1.8737
+2.0751,    2.2628
+1.9113,    3.6777
+2.3127,    3.9130
+1.9392,    2.3976
+3.1218,    2.5495
+1.7032,    1.1509
+0.4212,    3.5322
+2.7686,    0.9402
+2.1696,    2.9285
+0.3380,    2.0947
+3.6886,    0.4054
+2.6315,    3.1962
+-0.5332,    3.1421
+0.3380,    3.0801
+1.4030,    1.1841
+2.8739,    2.7777
+1.1254,    3.2404
+0.0988,    1.9522
+0.3688,    2.8904
+1.4758,   -1.6387
+1.9289,   -1.8191
+2.5741,   -1.3213
+2.1917,   -1.2852
+0.8358,   -2.3349
+2.6863,   -1.8834
+3.1102,   -0.4854
+3.7073,   -0.6466
+3.6394,   -0.4097
+0.5365,   -3.6555
+2.9295,   -0.3819
+0.8168,   -3.1133
+1.3432,   -1.7717
+1.1039,   -2.2261
+1.3754,   -2.2236
+0.6757,   -2.5379
+-0.2029,   -3.8420
+2.4210,   -1.9788
+1.0335,   -2.6042
+0.9638,   -2.9449
+-0.8198,   -5.4449
+1.9552,   -1.5530
+0.3505,   -3.1887
+2.4943,   -1.8116
+1.9761,   -1.0664
+0.5994,   -3.0513
+2.2076,   -1.6728
+1.9941,   -1.8826
+1.7487,   -2.9644
+1.4160,   -2.4234
--- a/ML/algorithms/naivebayes/example_data/targets.txt
+++ b/ML/algorithms/naivebayes/example_data/targets.txt
@@ -0,0 +1,90 @@
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
--- a/ML/algorithms/naivebayes/naivebayes.py
+++ b/ML/algorithms/naivebayes/naivebayes.py
@@ -0,0 +1,67 @@
+"""
+Naive Bayes Classifier Implementation from scratch
+
+To run the code structure the code in the following way:
+    X be size: (num_training_examples, num_features)
+    y be size: (num_classes, )
+
+Where the classes are 0, 1, 2, etc. Then an example run looks like:
+    NB = NaiveBayes(X, y)
+    NB.fit(X)
+    predictions = NB.predict(X)
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-21 Initial coding
+
+"""
+import numpy as np
+
+
+class NaiveBayes:
+    def __init__(self, X, y):
+        self.num_examples, self.num_features = X.shape
+        self.num_classes = len(np.unique(y))
+        self.eps = 1e-6
+
+    def fit(self, X):
+        self.classes_mean = {}
+        self.classes_variance = {}
+        self.classes_prior = {}
+
+        for c in range(self.num_classes):
+            X_c = X[y == c]
+
+            self.classes_mean[str(c)] = np.mean(X_c, axis=0)
+            self.classes_variance[str(c)] = np.var(X_c, axis=0)
+            self.classes_prior[str(c)] = X_c.shape[0] / X.shape[0]
+
+    def predict(self, X):
+        probs = np.zeros((self.num_examples, self.num_classes))
+
+        for c in range(self.num_classes):
+            prior = self.classes_prior[str(c)]
+            probs_c = self.density_function(
+                X, self.classes_mean[str(c)], self.classes_variance[str(c)]
+            )
+            probs[:, c] = probs_c + np.log(prior)
+
+        return np.argmax(probs, 1)
+
+    def density_function(self, x, mean, sigma):
+        # Calculate probability from Gaussian density function
+        const = -self.num_features / 2 * np.log(2 * np.pi) - 0.5 * np.sum(
+            np.log(sigma + self.eps)
+        )
+        probs = 0.5 * np.sum(np.power(x - mean, 2) / (sigma + self.eps), 1)
+        return const - probs
+
+
+if __name__ == "__main__":
+    X = np.loadtxt("example_data/data.txt", delimiter=",")
+    y = np.loadtxt("example_data/targets.txt") - 1
+
+    NB = NaiveBayes(X, y)
+    NB.fit(X)
+    y_pred = NB.predict(X)
+
+    print(f"Accuracy: {sum(y_pred==y)/X.shape[0]}")
--- a/ML/algorithms/neuralnetwork/NN.py
+++ b/ML/algorithms/neuralnetwork/NN.py
@@ -0,0 +1,174 @@
+"""
+Simple two-layered Neural Network from scratch implementation.
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-28 Initial coding
+
+"""
+
+import numpy as np
+from utils import create_dataset, plot_contour
+
+
+class NeuralNetwork:
+    def __init__(self, X, y):
+        # m for #training examples and n for #features
+        self.m, self.n = X.shape
+
+        # regularization term lambd (lambda is reserved keyword)
+        self.lambd = 1e-3
+        self.learning_rate = 0.1
+
+        # Define size of first hidden-layer and second hidden layer (output layer)
+        self.h1 = 25
+        self.h2 = len(np.unique(y))
+
+    def init_kaiming_weights(self, l0, l1):
+        # Kaiming weights
+        w = np.random.randn(l0, l1) * np.sqrt(2.0 / l0)
+        b = np.zeros((1, l1))
+
+        return w, b
+
+    def forward_prop(self, X, parameters):
+        W2 = parameters["W2"]
+        W1 = parameters["W1"]
+        b2 = parameters["b2"]
+        b1 = parameters["b1"]
+
+        # forward prop
+        a0 = X
+        z1 = np.dot(a0, W1) + b1
+
+        # apply nonlinearity (relu)
+        a1 = np.maximum(0, z1)
+        z2 = np.dot(a1, W2) + b2
+
+        # softmax on the last layer
+        scores = z2
+        exp_scores = np.exp(scores)
+        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
+
+        # cache values from forward pass to use for backward pass
+        cache = {"a0": X, "probs": probs, "a1": a1}
+
+        return cache, probs
+
+    def compute_cost(self, y, probs, parameters):
+        W2 = parameters["W2"]
+        W1 = parameters["W1"]
+
+        y = y.astype(int)
+        data_loss = np.sum(-np.log(probs[np.arange(self.m), y]) / self.m)
+        reg_loss = 0.5 * self.lambd * np.sum(W1 * W1) + 0.5 * self.lambd * np.sum(
+            W2 * W2
+        )
+
+        # total cost J
+        total_cost = data_loss + reg_loss
+
+        return total_cost
+
+    def back_prop(self, cache, parameters, y):
+        # Unpack from parameters
+        W2 = parameters["W2"]
+        W1 = parameters["W1"]
+        b2 = parameters["b2"]
+        b1 = parameters["b1"]
+
+        # Unpack from forward prop
+        a0 = cache["a0"]
+        a1 = cache["a1"]
+        probs = cache["probs"]
+
+        dz2 = probs
+        dz2[np.arange(self.m), y] -= 1
+        dz2 /= self.m
+
+        # backprop through values dW2 and db2
+        dW2 = np.dot(a1.T, dz2) + self.lambd * W2
+        db2 = np.sum(dz2, axis=0, keepdims=True)
+
+        # Back to the (only) hidden layer in this case
+        dz1 = np.dot(dz2, W2.T)
+        dz1 = dz1 * (a1 > 0)
+
+        # backprop through values dW1, db1
+        dW1 = np.dot(a0.T, dz1) + self.lambd * W1
+        db1 = np.sum(dz1, axis=0, keepdims=True)
+
+        grads = {"dW1": dW1, "dW2": dW2, "db1": db1, "db2": db2}
+
+        return grads
+
+    def update_parameters(self, parameters, grads):
+        learning_rate = self.learning_rate
+
+        W2 = parameters["W2"]
+        W1 = parameters["W1"]
+        b2 = parameters["b2"]
+        b1 = parameters["b1"]
+
+        dW2 = grads["dW2"]
+        dW1 = grads["dW1"]
+        db2 = grads["db2"]
+        db1 = grads["db1"]
+
+        # Do gradient descent step
+        W2 -= learning_rate * dW2
+        W1 -= learning_rate * dW1
+        b2 -= learning_rate * db2
+        b1 -= learning_rate * db1
+
+        # store back weights in parameters
+        parameters = {"W1": W1, "W2": W2, "b1": b1, "b2": b2}
+
+        return parameters
+
+    def main(self, X, y, num_iter=10000):
+        # initialize our weights
+        W1, b1 = self.init_kaiming_weights(self.n, self.h1)
+        W2, b2 = self.init_kaiming_weights(self.h1, self.h2)
+
+        # pack parameters into a dictionary
+        parameters = {"W1": W1, "W2": W2, "b1": b1, "b2": b2}
+
+        # How many gradient descent updates we want to do
+        for it in range(num_iter + 1):
+
+            # forward prop
+            cache, probs = self.forward_prop(X, parameters)
+
+            # calculate cost
+            cost = self.compute_cost(y, probs, parameters)
+
+            # print cost sometimes
+            if it % 2500 == 0:
+                print(f"At iteration {it} we have a cost of {cost}")
+
+            # back prop
+            grads = self.back_prop(cache, parameters, y)
+
+            # update parameters
+            parameters = self.update_parameters(parameters, grads)
+
+        return parameters
+
+
+if __name__ == "__main__":
+    # Generate dataset
+    X, y = create_dataset(300, K=3)
+    y = y.astype(int)
+
+    # Train network
+    NN = NeuralNetwork(X, y)
+    trained_parameters = NN.main(X, y)
+
+    # Get trained parameters
+    W2 = trained_parameters["W2"]
+    W1 = trained_parameters["W1"]
+    b2 = trained_parameters["b2"]
+    b1 = trained_parameters["b1"]
+
+    # Plot the decision boundary (for nice visualization)
+    plot_contour(X, y, NN, trained_parameters)
--- a/ML/algorithms/neuralnetwork/pycache/utils.cpython-37.pyc
+++ b/ML/algorithms/neuralnetwork/pycache/utils.cpython-37.pyc
--- a/ML/algorithms/neuralnetwork/utils.py
+++ b/ML/algorithms/neuralnetwork/utils.py
@@ -0,0 +1,50 @@
+"""
+These were (shamelessly) taken from cs231n course github code.
+I believe these were coded by Andrej Karpathy so credit goes to him
+for coding these.
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def create_dataset(N, K=2):
+    N = 100  # number of points per class
+    D = 2
+    X = np.zeros((N * K, D))  # data matrix (each row = single example)
+    y = np.zeros(N * K)  # class labels
+
+    for j in range(K):
+        ix = range(N * j, N * (j + 1))
+        r = np.linspace(0, 1, N)  # radius
+        t = np.linspace(j * 4, (j + 1) * 4, N) + np.random.randn(N) * 0.2
+        X[ix] = np.c_[r * np.sin(t), r * np.cos(t)]
+        y[ix] = j
+    # lets visualize the data:
+    plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral)
+    plt.show()
+
+    return X, y
+
+
+def plot_contour(X, y, model, parameters):
+    # plot the resulting classifier
+    h = 0.02
+    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
+    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
+
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
+
+    points = np.c_[xx.ravel(), yy.ravel()]
+
+    # forward prop with our trained parameters
+    _, Z = model.forward_prop(points, parameters)
+
+    # classify into highest prob
+    Z = np.argmax(Z, axis=1)
+    Z = Z.reshape(xx.shape)
+    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.8)
+
+    # plt the points
+    plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral)
+    # fig.savefig('spiral_net.png')
--- a/ML/algorithms/randomforest/example_data/data.txt
+++ b/ML/algorithms/randomforest/example_data/data.txt
@@ -0,0 +1,90 @@
+1.1107,   -2.1079,   1
+-0.5498,    0.0943,  1
+-0.0382,    1.8829,1
+0.0555,   -0.6139,1
+0.5870,   -1.2067,1
+0.5453,    0.2509,1
+-0.3927,   -0.6220,1
+-1.1905,   -1.8785,1
+-0.4240,    0.7772,1
+-0.7139,    1.5846,1
+-0.8883,    2.1408,1
+-0.6922,    0.0993,1
+1.4350,    1.2334,1
+-0.7576,    0.7386,1
+-1.1144,   -1.7059,1
+0.6612,   -1.7296,1
+-2.1381,   -0.0600,1
+1.3857,    1.2178,1
+-1.4951,    0.0373,1
+0.8029,    0.9739,1
+1.5607,    1.5862,1
+0.8563,   -1.4245,1
+0.0397,   -1.3799,1
+1.2331,    1.7421,1
+-2.0015,    0.8355,1
+-0.3428,   -0.4780,1
+-0.8891,    1.2634,1
+0.3832,   -0.1189,1
+0.4172,    1.0132,1
+-0.8695,   -0.7947,1
+2.9737,    3.6438,2
+3.7680,    1.8649,2
+0.1166,    0.9435,2
+0.6896,    3.9160,2
+1.2234,    2.9899,2
+2.3009,    0.4150,2
+3.7693,    3.8027,2
+1.9450,    3.4208,2
+0.9290,    3.3611,2
+5.0027,    2.7870,2
+1.0101,    1.8737,2
+2.0751,    2.2628,2
+1.9113,    3.6777,2
+2.3127,    3.9130,2
+1.9392,    2.3976,2
+3.1218,    2.5495,2
+1.7032,    1.1509,2
+0.4212,    3.5322,2
+2.7686,    0.9402,2
+2.1696,    2.9285,2
+0.3380,    2.0947,2
+3.6886,    0.4054,2
+2.6315,    3.1962,2
+-0.5332,    3.1421,2
+0.3380,    3.0801,2
+1.4030,    1.1841,2
+2.8739,    2.7777,2
+1.1254,    3.2404,2
+0.0988,    1.9522,2
+0.3688,    2.8904,2
+1.4758,   -1.6387,3
+1.9289,   -1.8191,3
+2.5741,   -1.3213,3
+2.1917,   -1.2852,3
+0.8358,   -2.3349,3
+2.6863,   -1.8834,3
+3.1102,   -0.4854,3
+3.7073,   -0.6466,3
+3.6394,   -0.4097,3
+0.5365,   -3.6555,3
+2.9295,   -0.3819,3
+0.8168,   -3.1133,3
+1.3432,   -1.7717,3
+1.1039,   -2.2261,3
+1.3754,   -2.2236,3
+0.6757,   -2.5379,3
+-0.2029,   -3.8420,3
+2.4210,   -1.9788,3
+1.0335,   -2.6042,3
+0.9638,   -2.9449,3
+-0.8198,   -5.4449,3
+1.9552,   -1.5530,3
+0.3505,   -3.1887,3
+2.4943,   -1.8116,3
+1.9761,   -1.0664,3
+0.5994,   -3.0513,3
+2.2076,   -1.6728,3
+1.9941,   -1.8826,3
+1.7487,   -2.9644,3
+1.4160,   -2.4234,3
--- a/ML/algorithms/randomforest/example_data/mock_data.csv
+++ b/ML/algorithms/randomforest/example_data/mock_data.csv
@@ -0,0 +1,100 @@
+701,478,227,863,963,2
+96,147,210,493,586,2
+798,143,431,541,94,1
+233,146,667,886,771,1
+668,815,628,429,387,3
+718,456,883,281,840,1
+182,837,144,664,460,2
+882,533,203,776,56,3
+648,715,288,619,293,1
+178,951,965,164,1,3
+270,432,457,978,794,1
+335,219,596,763,231,1
+47,477,78,423,616,3
+324,969,514,55,722,2
+824,571,159,516,594,2
+837,667,957,150,508,3
+833,945,311,12,859,1
+536,280,21,292,518,1
+943,55,709,269,425,1
+593,178,861,130,26,3
+54,165,3,638,816,2
+637,861,423,855,98,1
+222,502,427,944,732,1
+8,465,403,376,761,2
+184,602,673,825,741,1
+639,677,204,385,236,2
+176,843,479,952,898,2
+125,626,553,74,1000,3
+302,495,294,362,169,2
+131,912,803,232,852,1
+117,609,290,133,357,2
+207,812,788,182,494,1
+954,76,257,620,844,1
+287,266,195,30,344,3
+440,590,324,868,969,3
+831,290,228,586,971,1
+567,734,460,429,689,1
+864,939,191,620,431,1
+905,337,200,400,77,2
+304,997,141,208,615,3
+19,280,187,44,639,1
+280,279,275,305,123,1
+866,519,331,241,972,1
+27,77,860,458,643,3
+486,713,917,324,855,2
+466,16,897,222,731,1
+712,230,215,805,341,1
+300,100,292,978,115,3
+938,800,911,345,49,3
+98,593,43,583,684,1
+348,479,406,605,595,2
+892,877,592,339,615,3
+203,53,995,704,927,2
+991,968,886,43,883,1
+733,939,71,388,56,1
+249,376,830,628,812,2
+4,877,743,242,266,1
+95,537,106,490,518,2
+870,704,430,270,327,2
+402,97,283,569,638,3
+537,979,966,729,8,3
+399,51,285,973,509,1
+662,951,947,923,112,3
+71,573,9,305,351,2
+240,837,836,277,177,1
+513,318,709,435,367,2
+553,253,980,868,26,1
+848,543,171,420,73,1
+449,538,720,347,500,2
+42,319,830,447,727,2
+165,968,151,672,452,3
+1,781,142,137,157,2
+907,364,776,490,502,2
+146,512,87,344,233,3
+478,62,55,815,283,3
+751,789,112,277,483,1
+189,597,866,73,397,3
+607,210,327,538,68,2
+337,401,557,667,642,1
+249,894,84,81,643,1
+896,858,568,345,157,1
+362,886,558,531,735,1
+865,418,866,824,370,3
+14,517,514,257,129,2
+845,833,998,211,684,2
+289,302,416,364,920,2
+383,173,991,815,368,3
+652,325,903,471,224,3
+757,580,974,667,620,1
+946,247,684,191,332,2
+63,330,199,280,608,2
+752,298,95,143,134,2
+987,105,747,931,413,3
+510,23,385,711,701,1
+326,195,651,727,85,3
+214,978,396,428,14,1
+646,133,388,896,971,1
+849,817,294,491,397,2
+854,973,274,315,897,3
+666,530,683,234,439,1
--- a/ML/algorithms/randomforest/example_data/targets.txt
+++ b/ML/algorithms/randomforest/example_data/targets.txt
@@ -0,0 +1,90 @@
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
+3
--- a/ML/algorithms/randomforest/random_forest.py
+++ b/ML/algorithms/randomforest/random_forest.py
@@ -0,0 +1,170 @@
+"""
+Author: Philip Andreadis
+e-mail: philip_andreadis@hotmail.com
+
+
+Implementation of Random Forest model from scratch.
+The DecisionTree class from this project is used for generating the trees of the random forest.
+This class remains with no changes as the dataset is split into a number of folds with a random subset of features on which each tree is trained on.
+As a result each tree is trained on a different group of the dataset in order to avoid correlation between them.
+The predicted class value of each instance is chosen by voting from each single tree's outcome.
+
+Parameters of the model:
+MAX_DEPTH (int): Maximum depth of the decision tree
+MIN_NODE (int): Minimum number of instances a node can have. If this threshold is exceeded the node is terminated
+FOLD_SIZE (int): Value between 1-10 representing the percentage of the original dataset size each fold should be.
+N_TREES (int):The toral number of trees that will be trained.
+
+Input dataset to train() function must be a numpy array containing both feature and label values.
+
+"""
+
+
+
+from random import randrange
+from random import randint
+import numpy as np
+from decision_tree import DecisionTree
+
+# fold size (% of dataset size) e.g. 3 means 30%
+FOLD_SIZE = 10
+# number of trees
+N_TREES = 20
+# max tree depth
+MAX_DEPTH = 30
+# min size of tree node
+MIN_NODE = 1
+
+
+class RandomForest:
+    def __init__(self,n_trees,fold_size):
+        self.n_trees = n_trees
+        self.fold_size = fold_size
+        self.trees = list()
+
+
+
+    """
+        This function splits the given dataset into n-folds with replacement. The number of folds is equal to the number of the trees that will be trained.
+        Each tree will have one fold as input. The size of the folds is a percentage (p) of the size of the original dataset. 
+
+        Parameters:
+        dataset: np array of the given dataset
+        n_folds (int): number of folds in which the dataset should be split. Must be equal to the number of trees the user wants to train
+        p (int): suggests the percentage of the dataset's size the size of a single fold should be.
+
+        Returns list of np arrays: list with the k-folds 
+
+    """
+    def cross_validation_split(self,dataset, n_folds, p):
+        dataset_split = list()
+        fold_size = int(len(dataset)*p/10)
+        for i in range(n_folds):
+            fold = list()
+            while len(fold) < fold_size:
+                index = randrange(len(dataset))
+                fold.append(dataset[index])
+            set = np.array(fold)
+            dataset_split.append(set)
+        return dataset_split
+
+
+    """
+        This function randomizes the selection of the features each tree will be trained on.
+
+        Parameters:
+            splits list of np arrays: list of folds
+            
+
+        Returns list of np arrays: list with the k-folds with some features randomly removed
+
+    """
+    def randomize_features(self,splits):
+        dataset_split = list()
+        l = len(splits[0][0])
+        n_features = int((l-1)*5/10)
+        for split in splits:
+            for i in range(n_features):
+                rng = list(range(len(split[0]) - 1))
+                selected = rng.pop(randint(0,len(rng)-1))
+                split = np.delete(split, selected, 1)
+            set = np.array(split)
+            dataset_split.append(set)
+        return dataset_split
+
+
+    """
+        Prints out all the decision trees of the random forest.
+            
+        BUG: The feature number is not representative of its initial enumeration in the original dataset due to the randomization. 
+             This means that we do not know on which features each tree is trained on.
+    """
+    def print_trees(self):
+        i = 1
+        for t in self.trees:
+            print("Tree#",i)
+            temp = t.final_tree
+            t.print_dt(temp)
+            print("\n")
+            i = i+1
+
+    """
+        Iteratively train each decision tree.
+        Parameters:
+        X (np.array): Training data
+
+    """
+    def train(self,X):
+        train_x = self.cross_validation_split(X,self.n_trees,self.fold_size)
+        train_x = self.randomize_features(train_x)
+        for fold in train_x:
+            dt = DecisionTree(MAX_DEPTH, MIN_NODE)
+            dt.train(fold)
+            self.trees.append(dt)
+
+
+    """
+        This function outputs the class value for each instance of the given dataset as predicted by the random forest algorithm.
+        Parameters:
+        X (np.array): Dataset with labels
+
+        Returns y (np.array): array with the predicted class values of the dataset
+    """
+    def predict(self,X):
+        predicts = list()
+        final_predicts = list()
+        for tree in self.trees:
+            predicts.append(tree.predict(X))
+        # iterate through each tree's class prediction and find the most frequent for each instance
+        for i in range(len(predicts[0])):
+            values = list()
+            for j in range(len(predicts)):
+                values.append(predicts[j][i])
+            final_predicts.append(max(set(values), key=values.count))
+        return final_predicts,predicts
+
+
+
+if __name__ == "__main__":
+
+
+    # Training data
+    train_data = np.loadtxt("example_data/data.txt", delimiter=",")
+    train_y = np.loadtxt("example_data/targets.txt")
+
+    mock_train = np.loadtxt("example_data/mock_data.csv", delimiter=",")
+    mock_y = mock_train[ : , -1]
+
+    # Build and train model
+    rf = RandomForest(N_TREES,FOLD_SIZE)
+    rf.train(mock_train)
+
+    # Evaluate model on training data
+    y_pred,y_pred_ind = rf.predict(mock_train)
+    print(f"Accuracy of random forest: {sum(y_pred == mock_y) / mock_y.shape[0]}")
+    print("\nAccuracy for each individual tree:")
+    c = 1
+    for i in y_pred_ind:
+        print("\nTree",c)
+        print(f"Accuracy: {sum(i == mock_y) / mock_y.shape[0]}")
+        c = c+1
--- a/ML/algorithms/svm/pycache/knn.cpython-37.pyc
+++ b/ML/algorithms/svm/pycache/knn.cpython-37.pyc
--- a/ML/algorithms/svm/pycache/utils.cpython-37.pyc
+++ b/ML/algorithms/svm/pycache/utils.cpython-37.pyc
--- a/ML/algorithms/svm/svm.py
+++ b/ML/algorithms/svm/svm.py
@@ -0,0 +1,96 @@
+"""
+Implementation of SVM using cvxopt package. Implementation uses 
+soft margin and I've defined linear, polynomial and gaussian kernels.
+
+To understand the theory (which is a bit challenging) I recommend reading the following:
+http://cs229.stanford.edu/notes/cs229-notes3.pdf
+https://www.youtube.com/playlist?list=PLoROMvodv4rMiGQp3WXShtMGgzqpfVfbU (Lectures 6,7 by Andrew Ng)
+
+To understand how to reformulate the optimization problem we obtain
+to get the input to cvxopt QP solver this blogpost can be useful:
+https://xavierbourretsicotte.github.io/SVM_implementation.html
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-26 Initial coding
+
+"""
+
+import numpy as np
+import cvxopt
+from utils import create_dataset, plot_contour
+
+
+def linear(x, z):
+    return np.dot(x, z.T)
+
+
+def polynomial(x, z, p=5):
+    return (1 + np.dot(x, z.T)) ** p
+
+
+def gaussian(x, z, sigma=0.1):
+    return np.exp(-np.linalg.norm(x - z, axis=1) ** 2 / (2 * (sigma ** 2)))
+
+
+class SVM:
+    def __init__(self, kernel=gaussian, C=1):
+        self.kernel = kernel
+        self.C = C
+
+    def fit(self, X, y):
+        self.y = y
+        self.X = X
+        m, n = X.shape
+
+        # Calculate Kernel
+        self.K = np.zeros((m, m))
+        for i in range(m):
+            self.K[i, :] = self.kernel(X[i, np.newaxis], self.X)
+
+        # Solve with cvxopt final QP needs to be reformulated
+        # to match the input form for cvxopt.solvers.qp
+        P = cvxopt.matrix(np.outer(y, y) * self.K)
+        q = cvxopt.matrix(-np.ones((m, 1)))
+        G = cvxopt.matrix(np.vstack((np.eye(m) * -1, np.eye(m))))
+        h = cvxopt.matrix(np.hstack((np.zeros(m), np.ones(m) * self.C)))
+        A = cvxopt.matrix(y, (1, m), "d")
+        b = cvxopt.matrix(np.zeros(1))
+        cvxopt.solvers.options["show_progress"] = False
+        sol = cvxopt.solvers.qp(P, q, G, h, A, b)
+        self.alphas = np.array(sol["x"])
+
+    def predict(self, X):
+        y_predict = np.zeros((X.shape[0]))
+        sv = self.get_parameters(self.alphas)
+
+        for i in range(X.shape[0]):
+            y_predict[i] = np.sum(
+                self.alphas[sv]
+                * self.y[sv, np.newaxis]
+                * self.kernel(X[i], self.X[sv])[:, np.newaxis]
+            )
+
+        return np.sign(y_predict + self.b)
+
+    def get_parameters(self, alphas):
+        threshold = 1e-5
+
+        sv = ((alphas > threshold) * (alphas < self.C)).flatten()
+        self.w = np.dot(X[sv].T, alphas[sv] * self.y[sv, np.newaxis])
+        self.b = np.mean(
+            self.y[sv, np.newaxis]
+            - self.alphas[sv] * self.y[sv, np.newaxis] * self.K[sv, sv][:, np.newaxis]
+        )
+        return sv
+
+
+if __name__ == "__main__":
+    np.random.seed(1)
+    X, y = create_dataset(N=50)
+
+    svm = SVM(kernel=gaussian)
+    svm.fit(X, y)
+    y_pred = svm.predict(X)
+    plot_contour(X, y, svm)
+
+    print(f"Accuracy: {sum(y==y_pred)/y.shape[0]}")
--- a/ML/algorithms/svm/utils.py
+++ b/ML/algorithms/svm/utils.py
@@ -0,0 +1,47 @@
+"""
+These were (shamelessly) taken from cs231n course github code.
+I believe these were coded by Andrej Karpathy so credit goes to him
+for coding these.
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def create_dataset(N, D=2, K=2):
+    X = np.zeros((N * K, D))  # data matrix (each row = single example)
+    y = np.zeros(N * K)  # class labels
+
+    for j in range(K):
+        ix = range(N * j, N * (j + 1))
+        r = np.linspace(0.0, 1, N)  # radius
+        t = np.linspace(j * 4, (j + 1) * 4, N) + np.random.randn(N) * 0.2  # theta
+        X[ix] = np.c_[r * np.sin(t), r * np.cos(t)]
+        y[ix] = j
+
+    # lets visualize the data:
+    plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral)
+    plt.show()
+
+    y[y == 0] -= 1
+
+    return X, y
+
+
+def plot_contour(X, y, svm):
+    # plot the resulting classifier
+    h = 0.01
+    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
+    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
+
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
+
+    points = np.c_[xx.ravel(), yy.ravel()]
+
+    Z = svm.predict(points)
+    Z = Z.reshape(xx.shape)
+    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.8)
+
+    # plt the points
+    plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral)
+    plt.show()