K nearest neighbor, the unknown black dot is close to red cluster, so it is consider red
K is the number of points closest to the unknown dot to determine which cluster it belongs to
#test
import sklearn
from sklearn.utils import shuffle
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
from sklearn import linear_model, preprocessing
data = pd.read_csv('car.data')
print(data.head())
#transform text to number
# buying maint door persons log_boot safety class
#0 vhigh vhigh 2 2 small low unacc
#changed to
#[3 3 2 2 1 1 1]
le = preprocessing.LabelEncoder()
buying = le.fit_transform(list(data['buying']))
maint = le.fit_transform(list(data['maint']))
door = le.fit_transform(list(data['door']))
persons = le.fit_transform(list(data['persons']))
log_boot = le.fit_transform(list(data['log_boot']))
safety = le.fit_transform(list(data['safety']))
cls = le.fit_transform(list(data['class']))
print(buying)
predict = 'class'
x = list(zip(buying, maint, door, persons, log_boot, safety))
y = list(cls)
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x,y, test_size=0.1)
model = KNeighborsClassifier(n_neighbors=9)
model.fit(x_train, y_train)
accuracy = model.score(x_test, y_test)
print(accuracy)
predicted = model.predict(x_test)
names = ['unacc', 'acc', 'good', 'vgood']
for x in range(len(x_test)):
print('predicted: ', names[predicted[x]], 'data: ', x_test[x], 'Actual: ', names[y_test[x]])
//logs
buying maint door persons log_boot safety class
0 vhigh vhigh 2 2 small low unacc
1 vhigh vhigh 2 2 small med unacc
2 vhigh vhigh 2 2 small high unacc
3 vhigh vhigh 2 2 med low unacc
4 vhigh vhigh 2 2 med med unacc
[3 3 3 ... 1 1 1]
0.9017341040462428
predicted: good data: (0, 2, 2, 1, 2, 1) Actual: good
predicted: vgood data: (1, 1, 3, 2, 0, 0) Actual: vgood
predicted: good data: (0, 3, 0, 1, 1, 0) Actual: good
predicted: good data: (2, 1, 3, 0, 1, 2) Actual: good
predicted: unacc data: (1, 0, 2, 1, 1, 2) Actual: unacc
predicted: unacc data: (2, 1, 1, 1, 2, 0) Actual: acc
predicted: good data: (1, 3, 3, 2, 1, 1) Actual: good
predicted: unacc data: (0, 1, 2, 2, 0, 0) Actual: unacc
predicted: vgood data: (2, 1, 3, 2, 1, 0) Actual: vgood
predicted: good data: (3, 3, 0, 1, 0, 0) Actual: good
predicted: good data: (0, 2, 1, 1, 2, 2) Actual: good
predicted: good data: (3, 0, 0, 0, 0, 1) Actual: good
predicted: unacc data: (0, 2, 1, 1, 0, 0) Actual: unacc
predicted: acc data: (1, 1, 1, 1, 0, 2) Actual: acc
predicted: unacc data: (3, 1, 2, 2, 2, 0) Actual: unacc
predicted: unacc data: (2, 3, 2, 2, 2, 0) Actual: unacc
predicted: good data: (0, 1, 0, 1, 2, 2) Actual: good
predicted: good data: (3, 3, 1, 1, 0, 2) Actual: good
predicted: good data: (3, 0, 1, 0, 0, 1) Actual: good
predicted: unacc data: (2, 2, 1, 1, 1, 0) Actual: unacc
predicted: good data: (1, 1, 0, 0, 1, 0) Actual: good
predicted: unacc data: (1, 0, 2, 1, 2, 0) Actual: unacc
predicted: unacc data: (2, 2, 0, 2, 1, 2) Actual: unacc
predicted: good data: (2, 1, 0, 1, 2, 0) Actual: acc
predicted: good data: (2, 1, 1, 1, 0, 1) Actual: good
predicted: unacc data: (2, 1, 3, 2, 0, 0) Actual: vgood
predicted: good data: (0, 3, 2, 1, 1, 2) Actual: good
predicted: vgood data: (2, 1, 0, 1, 0, 0) Actual: vgood
predicted: good data: (1, 0, 3, 0, 0, 2) Actual: good
predicted: good data: (1, 1, 3, 0, 0, 0) Actual: good
predicted: good data: (3, 0, 1, 1, 2, 2) Actual: good
predicted: good data: (0, 1, 0, 0, 1, 0) Actual: good
predicted: vgood data: (1, 1, 2, 1, 0, 0) Actual: vgood
predicted: unacc data: (2, 3, 2, 2, 1, 2) Actual: unacc
predicted: good data: (0, 0, 0, 0, 2, 1) Actual: good
predicted: good data: (0, 2, 0, 0, 0, 1) Actual: good
predicted: unacc data: (2, 2, 1, 2, 2, 2) Actual: unacc
predicted: good data: (2, 3, 2, 0, 2, 0) Actual: good
predicted: good data: (1, 1, 0, 1, 1, 1) Actual: good
predicted: good data: (2, 0, 3, 1, 2, 0) Actual: unacc
predicted: good data: (0, 3, 1, 0, 2, 0) Actual: good
predicted: acc data: (1, 2, 3, 1, 1, 2) Actual: acc
predicted: good data: (3, 1, 0, 0, 0, 0) Actual: good
predicted: unacc data: (3, 2, 3, 1, 0, 2) Actual: unacc
predicted: good data: (0, 3, 0, 1, 2, 2) Actual: good
predicted: good data: (0, 1, 0, 2, 1, 1) Actual: good
predicted: good data: (3, 3, 1, 0, 2, 2) Actual: good
predicted: good data: (1, 3, 3, 1, 0, 1) Actual: good
predicted: good data: (2, 2, 0, 0, 2, 1) Actual: good
predicted: good data: (3, 3, 3, 0, 0, 2) Actual: good
predicted: good data: (3, 2, 3, 0, 2, 1) Actual: good
predicted: good data: (1, 3, 3, 0, 2, 1) Actual: good
predicted: unacc data: (3, 3, 3, 2, 0, 0) Actual: good
predicted: good data: (3, 3, 1, 1, 2, 0) Actual: good
predicted: good data: (0, 0, 1, 1, 2, 1) Actual: good
predicted: good data: (3, 1, 2, 1, 0, 0) Actual: unacc
predicted: unacc data: (3, 1, 2, 1, 1, 0) Actual: unacc
predicted: unacc data: (3, 2, 1, 1, 2, 0) Actual: unacc
predicted: good data: (1, 3, 1, 0, 1, 2) Actual: good
predicted: unacc data: (1, 1, 0, 1, 0, 2) Actual: acc
predicted: unacc data: (0, 1, 3, 2, 0, 2) Actual: unacc
predicted: good data: (2, 0, 0, 1, 1, 2) Actual: good
predicted: good data: (0, 0, 0, 0, 0, 0) Actual: good
predicted: good data: (3, 3, 3, 1, 0, 1) Actual: good
predicted: unacc data: (0, 2, 1, 2, 0, 2) Actual: unacc
predicted: good data: (2, 0, 2, 2, 2, 1) Actual: good
predicted: good data: (1, 2, 2, 0, 1, 0) Actual: good
predicted: good data: (0, 3, 1, 2, 1, 2) Actual: good
predicted: good data: (2, 2, 2, 0, 0, 0) Actual: good
predicted: good data: (0, 0, 2, 1, 1, 2) Actual: unacc
predicted: good data: (0, 2, 3, 1, 2, 2) Actual: good
predicted: good data: (2, 1, 2, 1, 2, 1) Actual: good
predicted: good data: (3, 0, 2, 0, 0, 2) Actual: good
predicted: good data: (1, 1, 2, 0, 2, 2) Actual: good
predicted: good data: (3, 1, 1, 0, 0, 0) Actual: good
predicted: unacc data: (1, 3, 2, 2, 0, 2) Actual: unacc
predicted: good data: (0, 3, 2, 0, 0, 1) Actual: good
predicted: unacc data: (0, 0, 0, 2, 0, 2) Actual: unacc
predicted: good data: (3, 2, 0, 0, 2, 2) Actual: good
predicted: unacc data: (2, 0, 3, 2, 2, 0) Actual: unacc
predicted: unacc data: (2, 3, 0, 2, 1, 0) Actual: unacc
predicted: good data: (2, 0, 2, 1, 1, 1) Actual: good
predicted: good data: (0, 1, 3, 1, 2, 2) Actual: good
predicted: unacc data: (1, 3, 3, 2, 1, 2) Actual: unacc
predicted: good data: (2, 3, 1, 1, 1, 2) Actual: good
predicted: good data: (3, 2, 2, 0, 1, 0) Actual: good
predicted: good data: (2, 2, 2, 0, 1, 2) Actual: good
predicted: unacc data: (1, 3, 0, 2, 0, 0) Actual: unacc
predicted: good data: (3, 0, 1, 1, 2, 1) Actual: good
predicted: unacc data: (2, 2, 0, 1, 2, 0) Actual: unacc
predicted: good data: (3, 1, 1, 2, 2, 0) Actual: unacc
predicted: good data: (3, 1, 2, 2, 0, 0) Actual: unacc
predicted: good data: (1, 1, 0, 1, 2, 1) Actual: good
predicted: unacc data: (0, 1, 0, 1, 1, 0) Actual: unacc
predicted: good data: (2, 1, 3, 0, 1, 0) Actual: good
predicted: unacc data: (3, 2, 1, 2, 0, 2) Actual: unacc
predicted: good data: (1, 1, 0, 0, 2, 1) Actual: good
predicted: good data: (0, 0, 1, 0, 2, 0) Actual: good
predicted: good data: (1, 0, 3, 2, 2, 1) Actual: good
predicted: good data: (3, 1, 1, 1, 1, 2) Actual: good
predicted: good data: (3, 0, 2, 1, 2, 1) Actual: good
predicted: acc data: (1, 1, 2, 1, 2, 0) Actual: acc
predicted: good data: (0, 2, 2, 0, 0, 0) Actual: good
predicted: good data: (0, 1, 3, 0, 1, 2) Actual: good
predicted: good data: (0, 1, 0, 2, 1, 2) Actual: good
predicted: good data: (2, 0, 2, 1, 2, 0) Actual: unacc
predicted: good data: (0, 3, 3, 2, 2, 2) Actual: good
predicted: good data: (1, 1, 3, 1, 2, 1) Actual: good
predicted: good data: (3, 0, 2, 2, 2, 0) Actual: good
predicted: good data: (0, 0, 3, 2, 2, 2) Actual: good
predicted: unacc data: (2, 3, 3, 2, 2, 0) Actual: unacc
predicted: good data: (2, 3, 2, 1, 1, 2) Actual: unacc
predicted: good data: (3, 1, 3, 0, 2, 2) Actual: good
predicted: good data: (2, 1, 1, 2, 2, 0) Actual: acc
predicted: good data: (3, 3, 2, 1, 0, 0) Actual: good
predicted: good data: (0, 0, 0, 2, 0, 1) Actual: good
predicted: unacc data: (0, 1, 3, 1, 2, 0) Actual: unacc
predicted: good data: (0, 1, 3, 0, 2, 2) Actual: good
predicted: good data: (3, 1, 0, 0, 1, 1) Actual: good
predicted: good data: (3, 0, 2, 2, 0, 0) Actual: good
predicted: good data: (2, 2, 3, 2, 0, 1) Actual: good
predicted: good data: (0, 2, 2, 1, 2, 2) Actual: good
predicted: good data: (0, 2, 2, 0, 0, 1) Actual: good
predicted: good data: (2, 2, 1, 0, 0, 0) Actual: good
predicted: unacc data: (0, 2, 2, 1, 2, 0) Actual: unacc
predicted: good data: (0, 3, 1, 0, 1, 1) Actual: good
predicted: vgood data: (2, 1, 2, 2, 0, 0) Actual: vgood
predicted: good data: (2, 0, 1, 2, 2, 0) Actual: unacc
predicted: good data: (1, 1, 1, 0, 0, 1) Actual: good
predicted: good data: (3, 0, 0, 2, 2, 2) Actual: good
predicted: good data: (1, 3, 0, 0, 0, 2) Actual: good
predicted: good data: (3, 0, 0, 0, 2, 1) Actual: good
predicted: good data: (0, 3, 0, 2, 0, 0) Actual: good
predicted: unacc data: (0, 0, 1, 2, 0, 2) Actual: unacc
predicted: good data: (2, 0, 3, 0, 1, 0) Actual: good
predicted: good data: (1, 0, 1, 1, 0, 1) Actual: good
predicted: good data: (1, 2, 0, 2, 2, 2) Actual: good
predicted: good data: (0, 2, 0, 1, 1, 2) Actual: good
predicted: unacc data: (0, 0, 2, 1, 1, 0) Actual: unacc
predicted: good data: (1, 0, 2, 0, 1, 2) Actual: good
predicted: good data: (3, 3, 0, 1, 1, 2) Actual: good
predicted: unacc data: (2, 3, 1, 2, 1, 2) Actual: unacc
predicted: vgood data: (1, 2, 1, 1, 0, 0) Actual: vgood
predicted: unacc data: (1, 3, 2, 1, 2, 0) Actual: unacc
predicted: good data: (1, 1, 2, 0, 0, 1) Actual: good
predicted: good data: (2, 2, 1, 1, 1, 1) Actual: good
predicted: good data: (2, 1, 1, 2, 0, 1) Actual: good
predicted: unacc data: (0, 0, 3, 1, 0, 2) Actual: unacc
predicted: good data: (3, 2, 1, 0, 2, 0) Actual: good
predicted: good data: (3, 0, 3, 1, 1, 1) Actual: good
predicted: good data: (3, 2, 0, 1, 1, 1) Actual: good
predicted: good data: (0, 2, 1, 2, 2, 1) Actual: good
predicted: unacc data: (0, 1, 1, 2, 0, 0) Actual: unacc
predicted: unacc data: (0, 1, 1, 2, 1, 2) Actual: unacc
predicted: good data: (1, 3, 0, 2, 2, 0) Actual: good
predicted: good data: (1, 3, 2, 0, 1, 2) Actual: good
predicted: unacc data: (2, 2, 1, 1, 1, 2) Actual: unacc
predicted: good data: (3, 1, 2, 0, 0, 1) Actual: good
predicted: unacc data: (3, 2, 3, 2, 1, 2) Actual: unacc
predicted: good data: (3, 0, 3, 0, 0, 2) Actual: good
predicted: good data: (3, 3, 3, 2, 1, 0) Actual: good
predicted: good data: (3, 0, 2, 1, 0, 2) Actual: good
predicted: good data: (3, 0, 3, 2, 1, 2) Actual: good
predicted: good data: (3, 0, 2, 2, 1, 2) Actual: good
predicted: good data: (2, 3, 0, 0, 1, 0) Actual: good
predicted: good data: (2, 3, 3, 1, 1, 2) Actual: unacc
predicted: good data: (3, 1, 3, 0, 2, 1) Actual: good
predicted: good data: (3, 2, 3, 2, 2, 2) Actual: good
predicted: unacc data: (1, 2, 3, 1, 2, 0) Actual: acc
predicted: good data: (1, 3, 1, 2, 1, 1) Actual: good
predicted: unacc data: (0, 1, 0, 2, 0, 0) Actual: unacc
predicted: good data: (3, 1, 1, 2, 1, 2) Actual: unacc
predicted: good data: (0, 1, 1, 0, 2, 1) Actual: good
reference:
https://www.youtube.com/watch?v=ddqQUz9mZaM
No comments:
Post a Comment