import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
= ListedColormap(['#FF0000','#00FF00','#0000FF'])
cmap
= datasets.load_iris()
iris = iris.data, iris.target
X, y
= train_test_split(X, y, test_size=0.2, random_state=1234)
X_train, X_test, y_train, y_test
plt.figure()2],X[:,3], c=y, cmap=cmap, edgecolor='k', s=20)
plt.scatter(X[:, plt.show()
from collections import Counter
def euclidean_distance(x1, x2):
sum((x1-x2)**2))
np.sqrt(np.
class KNN:
def __init__(self, k=3):
self.k = k
def fit(self, X, y):
self.X_train = X
self.y_train = y
def predict(self, X):
= [self._predict(x) for x in X]
predictions return predictions
def _predict(self, x):
# conmpute the distance
= [euclidean_distance(x, x_train) for x_train in self.X_train]
distances
# get the closest k
= np.argsort(distances)[:self.k]
k_indices = [self.y_train[i] for i in k_indices]
k_nearest_labels return predictions
# majority vote
= Counter(k_nearest_labels).most_common()
most_common return most_common[0][0]
from collections import Counter
import numpy as np
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2) ** 2))
class KNN:
def __init__(self, k=3):
self.k = k
def fit(self, X, y):
self.X_train = X
self.y_train = y
def predict(self, X):
= [self._predict(x) for x in X]
predictions return predictions
def _predict(self, x):
# Compute the distance
= [euclidean_distance(x, x_train) for x_train in self.X_train]
distances
# Get the closest k indices
= np.argsort(distances)[:self.k]
k_indices
# Get the labels of the k nearest neighbors
= [self.y_train[i] for i in k_indices]
k_nearest_labels
# Majority vote, most common class label
= Counter(k_nearest_labels).most_common()
most_common return most_common # Return the most common label
= KNN(k=5)
clf
clf.fit(X_train, y_train)= clf.predict(X_test)
predictions
print(predictions)
[[(1, 4), (2, 1)], [(2, 3), (1, 2)], [(2, 5)], [(0, 5)], [(1, 5)], [(0, 5)], [(0, 5)], [(0, 5)], [(1, 5)], [(2, 5)], [(1, 5)], [(0, 5)], [(2, 5)], [(1, 5)], [(0, 5)], [(1, 5)], [(2, 5)], [(0, 5)], [(2, 5)], [(1, 5)], [(1, 5)], [(1, 5)], [(1, 5)], [(1, 5)], [(2, 5)], [(0, 5)], [(2, 4), (1, 1)], [(1, 5)], [(2, 5)], [(0, 5)]]
from collections import Counter
import numpy as np
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2) ** 2))
class KNN:
def __init__(self, k=3):
self.k = k
def fit(self, X, y):
self.X_train = X
self.y_train = y
def predict(self, X):
= [self._predict(x) for x in X]
predictions return predictions
def _predict(self, x):
# Compute the distance
= [euclidean_distance(x, x_train) for x_train in self.X_train]
distances
# Get the closest k indices
= np.argsort(distances)[:self.k]
k_indices
# Get the labels of the k nearest neighbors
= [self.y_train[i] for i in k_indices]
k_nearest_labels
# Majority vote, most common class label
# refining the class to get the first label
= Counter(k_nearest_labels).most_common()
most_common return most_common[0][0] # Return the most common label
= KNN(k=5)
clf
clf.fit(X_train, y_train)= clf.predict(X_test)
predictions
print(predictions)
[1, 2, 2, 0, 1, 0, 0, 0, 1, 2, 1, 0, 2, 1, 0, 1, 2, 0, 2, 1, 1, 1, 1, 1, 2, 0, 2, 1, 2, 0]
# calculating the accuracy
= np.sum(predictions == y_test) / len(y_test)
acc
print(acc)
0.9666666666666667
Resources:-
- https://medium.com/@Khuranasoils/linear-regression-is-a-fundamental-statistical-method-used-for-modelling-the-relationship-between-a-e0544296fe56
- https://www.youtube.com/watch?v=rTEtEy5o3X0