mirror of
https://github.com/aladdinpersson/Machine-Learning-Collection.git
synced 2026-02-21 11:18:01 +00:00
Initial commit
This commit is contained in:
BIN
ML/algorithms/knn/__pycache__/utils.cpython-37.pyc
Normal file
BIN
ML/algorithms/knn/__pycache__/utils.cpython-37.pyc
Normal file
Binary file not shown.
90
ML/algorithms/knn/example_data/data.txt
Normal file
90
ML/algorithms/knn/example_data/data.txt
Normal file
@@ -0,0 +1,90 @@
|
||||
1.1107, -2.1079
|
||||
-0.5498, 0.0943
|
||||
-0.0382, 1.8829
|
||||
0.0555, -0.6139
|
||||
0.5870, -1.2067
|
||||
0.5453, 0.2509
|
||||
-0.3927, -0.6220
|
||||
-1.1905, -1.8785
|
||||
-0.4240, 0.7772
|
||||
-0.7139, 1.5846
|
||||
-0.8883, 2.1408
|
||||
-0.6922, 0.0993
|
||||
1.4350, 1.2334
|
||||
-0.7576, 0.7386
|
||||
-1.1144, -1.7059
|
||||
0.6612, -1.7296
|
||||
-2.1381, -0.0600
|
||||
1.3857, 1.2178
|
||||
-1.4951, 0.0373
|
||||
0.8029, 0.9739
|
||||
1.5607, 1.5862
|
||||
0.8563, -1.4245
|
||||
0.0397, -1.3799
|
||||
1.2331, 1.7421
|
||||
-2.0015, 0.8355
|
||||
-0.3428, -0.4780
|
||||
-0.8891, 1.2634
|
||||
0.3832, -0.1189
|
||||
0.4172, 1.0132
|
||||
-0.8695, -0.7947
|
||||
2.9737, 3.6438
|
||||
3.7680, 1.8649
|
||||
0.1166, 0.9435
|
||||
0.6896, 3.9160
|
||||
1.2234, 2.9899
|
||||
2.3009, 0.4150
|
||||
3.7693, 3.8027
|
||||
1.9450, 3.4208
|
||||
0.9290, 3.3611
|
||||
5.0027, 2.7870
|
||||
1.0101, 1.8737
|
||||
2.0751, 2.2628
|
||||
1.9113, 3.6777
|
||||
2.3127, 3.9130
|
||||
1.9392, 2.3976
|
||||
3.1218, 2.5495
|
||||
1.7032, 1.1509
|
||||
0.4212, 3.5322
|
||||
2.7686, 0.9402
|
||||
2.1696, 2.9285
|
||||
0.3380, 2.0947
|
||||
3.6886, 0.4054
|
||||
2.6315, 3.1962
|
||||
-0.5332, 3.1421
|
||||
0.3380, 3.0801
|
||||
1.4030, 1.1841
|
||||
2.8739, 2.7777
|
||||
1.1254, 3.2404
|
||||
0.0988, 1.9522
|
||||
0.3688, 2.8904
|
||||
1.4758, -1.6387
|
||||
1.9289, -1.8191
|
||||
2.5741, -1.3213
|
||||
2.1917, -1.2852
|
||||
0.8358, -2.3349
|
||||
2.6863, -1.8834
|
||||
3.1102, -0.4854
|
||||
3.7073, -0.6466
|
||||
3.6394, -0.4097
|
||||
0.5365, -3.6555
|
||||
2.9295, -0.3819
|
||||
0.8168, -3.1133
|
||||
1.3432, -1.7717
|
||||
1.1039, -2.2261
|
||||
1.3754, -2.2236
|
||||
0.6757, -2.5379
|
||||
-0.2029, -3.8420
|
||||
2.4210, -1.9788
|
||||
1.0335, -2.6042
|
||||
0.9638, -2.9449
|
||||
-0.8198, -5.4449
|
||||
1.9552, -1.5530
|
||||
0.3505, -3.1887
|
||||
2.4943, -1.8116
|
||||
1.9761, -1.0664
|
||||
0.5994, -3.0513
|
||||
2.2076, -1.6728
|
||||
1.9941, -1.8826
|
||||
1.7487, -2.9644
|
||||
1.4160, -2.4234
|
||||
90
ML/algorithms/knn/example_data/targets.txt
Normal file
90
ML/algorithms/knn/example_data/targets.txt
Normal file
@@ -0,0 +1,90 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
110
ML/algorithms/knn/knn.py
Normal file
110
ML/algorithms/knn/knn.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""
|
||||
Implementation of K-nearest neighbor (KNN) from scratch
|
||||
where you can either use 2-loops (inefficient), 1-loop (better)
|
||||
or a heavily vectorized zero-loop implementation.
|
||||
|
||||
Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
|
||||
* 2020-04-24 Initial coding
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class KNearestNeighbor:
|
||||
def __init__(self, k):
|
||||
self.k = k
|
||||
self.eps = 1e-8
|
||||
|
||||
def train(self, X, y):
|
||||
self.X_train = X
|
||||
self.y_train = y
|
||||
|
||||
def predict(self, X_test, num_loops=0):
|
||||
if num_loops == 0:
|
||||
distances = self.compute_distance_vectorized(X_test)
|
||||
|
||||
elif num_loops == 1:
|
||||
distances = self.compute_distance_one_loop(X_test)
|
||||
|
||||
else:
|
||||
distances = self.compute_distance_two_loops(X_test)
|
||||
|
||||
return self.predict_labels(distances)
|
||||
|
||||
def compute_distance_two_loops(self, X_test):
|
||||
"""
|
||||
Inefficient naive implementation, use only
|
||||
as a way of understanding what kNN is doing
|
||||
"""
|
||||
|
||||
num_test = X_test.shape[0]
|
||||
num_train = self.X_train.shape[0]
|
||||
distances = np.zeros((num_test, num_train))
|
||||
|
||||
for i in range(num_test):
|
||||
for j in range(num_train):
|
||||
# (Taking sqrt is not necessary: min distance won't change since sqrt is monotone)
|
||||
distances[i, j] = np.sqrt(
|
||||
self.eps + np.sum((X_test[i, :] - self.X_train[j, :]) ** 2)
|
||||
)
|
||||
|
||||
return distances
|
||||
|
||||
def compute_distance_one_loop(self, X_test):
|
||||
"""
|
||||
Much better than two-loops but not as fast as fully vectorized version.
|
||||
Utilize Numpy broadcasting in X_train - X_test[i,:]
|
||||
"""
|
||||
num_test = X_test.shape[0]
|
||||
num_train = self.X_train.shape[0]
|
||||
distances = np.zeros((num_test, num_train))
|
||||
|
||||
for i in range(num_test):
|
||||
# (Taking sqrt is not necessary: min distance won't change since sqrt is monotone)
|
||||
distances[i, :] = np.sqrt(
|
||||
self.eps + np.sum((self.X_train - X_test[i, :]) ** 2, axis=1)
|
||||
)
|
||||
|
||||
return distances
|
||||
|
||||
def compute_distance_vectorized(self, X_test):
|
||||
"""
|
||||
Can be tricky to understand this, we utilize heavy
|
||||
vecotorization as well as numpy broadcasting.
|
||||
Idea: if we have two vectors a, b (two examples)
|
||||
and for vectors we can compute (a-b)^2 = a^2 - 2a (dot) b + b^2
|
||||
expanding on this and doing so for every vector lends to the
|
||||
heavy vectorized formula for all examples at the same time.
|
||||
"""
|
||||
X_test_squared = np.sum(X_test ** 2, axis=1, keepdims=True)
|
||||
X_train_squared = np.sum(self.X_train ** 2, axis=1, keepdims=True)
|
||||
two_X_test_X_train = np.dot(X_test, self.X_train.T)
|
||||
|
||||
# (Taking sqrt is not necessary: min distance won't change since sqrt is monotone)
|
||||
return np.sqrt(
|
||||
self.eps + X_test_squared - 2 * two_X_test_X_train + X_train_squared.T
|
||||
)
|
||||
|
||||
def predict_labels(self, distances):
|
||||
num_test = distances.shape[0]
|
||||
y_pred = np.zeros(num_test)
|
||||
|
||||
for i in range(num_test):
|
||||
y_indices = np.argsort(distances[i, :])
|
||||
k_closest_classes = self.y_train[y_indices[: self.k]].astype(int)
|
||||
y_pred[i] = np.argmax(np.bincount(k_closest_classes))
|
||||
|
||||
return y_pred
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
X = np.loadtxt("example_data/data.txt", delimiter=",")
|
||||
y = np.loadtxt("example_data/targets.txt")
|
||||
|
||||
X = np.array([[1, 1], [3, 1], [1, 4], [2, 4], [3, 3], [5, 1]])
|
||||
y = np.array([0, 0, 0, 1, 1, 1])
|
||||
|
||||
KNN = KNearestNeighbor(k=1)
|
||||
KNN.train(X, y)
|
||||
y_pred = KNN.predict(X, num_loops=0)
|
||||
print(f"Accuracy: {sum(y_pred == y) / y.shape[0]}")
|
||||
Reference in New Issue
Block a user