Initial commit

This commit is contained in:
Aladdin Persson
2021-01-30 21:49:15 +01:00
commit 65b8c80495
432 changed files with 1290844 additions and 0 deletions

Binary file not shown.

View File

@@ -0,0 +1,90 @@
1.1107, -2.1079
-0.5498, 0.0943
-0.0382, 1.8829
0.0555, -0.6139
0.5870, -1.2067
0.5453, 0.2509
-0.3927, -0.6220
-1.1905, -1.8785
-0.4240, 0.7772
-0.7139, 1.5846
-0.8883, 2.1408
-0.6922, 0.0993
1.4350, 1.2334
-0.7576, 0.7386
-1.1144, -1.7059
0.6612, -1.7296
-2.1381, -0.0600
1.3857, 1.2178
-1.4951, 0.0373
0.8029, 0.9739
1.5607, 1.5862
0.8563, -1.4245
0.0397, -1.3799
1.2331, 1.7421
-2.0015, 0.8355
-0.3428, -0.4780
-0.8891, 1.2634
0.3832, -0.1189
0.4172, 1.0132
-0.8695, -0.7947
2.9737, 3.6438
3.7680, 1.8649
0.1166, 0.9435
0.6896, 3.9160
1.2234, 2.9899
2.3009, 0.4150
3.7693, 3.8027
1.9450, 3.4208
0.9290, 3.3611
5.0027, 2.7870
1.0101, 1.8737
2.0751, 2.2628
1.9113, 3.6777
2.3127, 3.9130
1.9392, 2.3976
3.1218, 2.5495
1.7032, 1.1509
0.4212, 3.5322
2.7686, 0.9402
2.1696, 2.9285
0.3380, 2.0947
3.6886, 0.4054
2.6315, 3.1962
-0.5332, 3.1421
0.3380, 3.0801
1.4030, 1.1841
2.8739, 2.7777
1.1254, 3.2404
0.0988, 1.9522
0.3688, 2.8904
1.4758, -1.6387
1.9289, -1.8191
2.5741, -1.3213
2.1917, -1.2852
0.8358, -2.3349
2.6863, -1.8834
3.1102, -0.4854
3.7073, -0.6466
3.6394, -0.4097
0.5365, -3.6555
2.9295, -0.3819
0.8168, -3.1133
1.3432, -1.7717
1.1039, -2.2261
1.3754, -2.2236
0.6757, -2.5379
-0.2029, -3.8420
2.4210, -1.9788
1.0335, -2.6042
0.9638, -2.9449
-0.8198, -5.4449
1.9552, -1.5530
0.3505, -3.1887
2.4943, -1.8116
1.9761, -1.0664
0.5994, -3.0513
2.2076, -1.6728
1.9941, -1.8826
1.7487, -2.9644
1.4160, -2.4234

View File

@@ -0,0 +1,90 @@
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3

110
ML/algorithms/knn/knn.py Normal file
View File

@@ -0,0 +1,110 @@
"""
Implementation of K-nearest neighbor (KNN) from scratch
where you can either use 2-loops (inefficient), 1-loop (better)
or a heavily vectorized zero-loop implementation.
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
* 2020-04-24 Initial coding
"""
import numpy as np
class KNearestNeighbor:
def __init__(self, k):
self.k = k
self.eps = 1e-8
def train(self, X, y):
self.X_train = X
self.y_train = y
def predict(self, X_test, num_loops=0):
if num_loops == 0:
distances = self.compute_distance_vectorized(X_test)
elif num_loops == 1:
distances = self.compute_distance_one_loop(X_test)
else:
distances = self.compute_distance_two_loops(X_test)
return self.predict_labels(distances)
def compute_distance_two_loops(self, X_test):
"""
Inefficient naive implementation, use only
as a way of understanding what kNN is doing
"""
num_test = X_test.shape[0]
num_train = self.X_train.shape[0]
distances = np.zeros((num_test, num_train))
for i in range(num_test):
for j in range(num_train):
# (Taking sqrt is not necessary: min distance won't change since sqrt is monotone)
distances[i, j] = np.sqrt(
self.eps + np.sum((X_test[i, :] - self.X_train[j, :]) ** 2)
)
return distances
def compute_distance_one_loop(self, X_test):
"""
Much better than two-loops but not as fast as fully vectorized version.
Utilize Numpy broadcasting in X_train - X_test[i,:]
"""
num_test = X_test.shape[0]
num_train = self.X_train.shape[0]
distances = np.zeros((num_test, num_train))
for i in range(num_test):
# (Taking sqrt is not necessary: min distance won't change since sqrt is monotone)
distances[i, :] = np.sqrt(
self.eps + np.sum((self.X_train - X_test[i, :]) ** 2, axis=1)
)
return distances
def compute_distance_vectorized(self, X_test):
"""
Can be tricky to understand this, we utilize heavy
vecotorization as well as numpy broadcasting.
Idea: if we have two vectors a, b (two examples)
and for vectors we can compute (a-b)^2 = a^2 - 2a (dot) b + b^2
expanding on this and doing so for every vector lends to the
heavy vectorized formula for all examples at the same time.
"""
X_test_squared = np.sum(X_test ** 2, axis=1, keepdims=True)
X_train_squared = np.sum(self.X_train ** 2, axis=1, keepdims=True)
two_X_test_X_train = np.dot(X_test, self.X_train.T)
# (Taking sqrt is not necessary: min distance won't change since sqrt is monotone)
return np.sqrt(
self.eps + X_test_squared - 2 * two_X_test_X_train + X_train_squared.T
)
def predict_labels(self, distances):
num_test = distances.shape[0]
y_pred = np.zeros(num_test)
for i in range(num_test):
y_indices = np.argsort(distances[i, :])
k_closest_classes = self.y_train[y_indices[: self.k]].astype(int)
y_pred[i] = np.argmax(np.bincount(k_closest_classes))
return y_pred
if __name__ == "__main__":
X = np.loadtxt("example_data/data.txt", delimiter=",")
y = np.loadtxt("example_data/targets.txt")
X = np.array([[1, 1], [3, 1], [1, 4], [2, 4], [3, 3], [5, 1]])
y = np.array([0, 0, 0, 1, 1, 1])
KNN = KNearestNeighbor(k=1)
KNN.train(X, y)
y_pred = KNN.predict(X, num_loops=0)
print(f"Accuracy: {sum(y_pred == y) / y.shape[0]}")