predict the quality of wine based on different aspects. quality will be divided into poor, common, good.
#wine.py
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
import numpy as np
from random import randint
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
df = pd.read_csv('data/winequality-white.csv')
"""
with pd.option_context( 'display.max_columns', None, 'display.width', 1000):
print(df.head())
"""
dataset = df.values
samples = dataset[:, 0:11]
labels = dataset[:, 11]
#divide wine into 3 qualities - poor (0), common (6), good (10)
labels[labels < 6] = 0
labels[labels > 6] = 10
samples, labels = shuffle(samples, labels)
#print(labels)
#[ 0. 0. 10. ... 6. 0. 10.]
#ncode output categories
encoder = LabelEncoder()
encoder.fit(labels)
encoded_labels = encoder.transform(labels)
one_hot_labels = to_categorical(encoded_labels)
labels_index = np.argmax(one_hot_labels, axis=-1)
with np.printoptions(threshold=np.inf):
print(one_hot_labels[0:5])
print(labels_index[0:10])
"""
one_hot_labels
[[1. 0. 0.]
[1. 0. 0.]
[0. 0. 1.]
[0. 0. 1.]
[0. 1. 0.]]
labels_index
[0 0 2 2 1 0 1 2 0 1]
"""
#scale input between 0, 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_samples = scaler.fit_transform(samples)
#split dataset 70% training, 30% testing
train_samples, test_samples, train_labels, test_labels = train_test_split(scaled_train_samples, labels_index, test_size=0.3)
#print(train_samples.shape, train_labels.shape, test_labels.shape, test_labels.shape)
#train on GPU
pysical_devices = tf.config.experimental.list_physical_devices('GPU')
#print("Num GPUs Available: ", len(pysical_devices))
tf.config.experimental.set_memory_growth(pysical_devices[0], True)
#create model
model = Sequential([
Dense(units=128, input_shape=(11,), activation='relu',
activity_regularizer=regularizers.l2(0.1)),
Dropout(0.2),
Dense(units=64, activation='relu'),
Dropout(0.2),
Dense(units=8, activation='relu'),
Dropout(0.2),
Dense(units=3, activation='softmax')
])
model.summary()
#training & validation
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x=train_samples, y=train_labels, validation_split=0.2, batch_size=32, epochs=100, shuffle=True, verbose=2)
#prediction
predictions = model.predict(x=test_samples, batch_size=32, verbose=1)
#print(predictions)
#predicted output index
rounded_predictions = np.argmax(predictions, axis=-1)
print(rounded_predictions)
# visualize prediction accuracy - confusion matrix
cm = confusion_matrix(y_true=test_labels, y_pred=rounded_predictions)
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
#quality category
cm_plot_labels = ['poor', 'common', 'good']
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')
--------------------------------------
#logsModel: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 128) 1536
_________________________________________________________________
dropout (Dropout) (None, 128) 0
_________________________________________________________________
dense_1 (Dense) (None, 64) 8256
_________________________________________________________________
dropout_1 (Dropout) (None, 64) 0
_________________________________________________________________
dense_2 (Dense) (None, 8) 520
_________________________________________________________________
dropout_2 (Dropout) (None, 8) 0
_________________________________________________________________
dense_3 (Dense) (None, 3) 27
=================================================================
Total params: 10,339
Trainable params: 10,339
Non-trainable params: 0
_________________________________________________________________
2021-01-01 21:23:44.355260: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (register
ed 2)
Epoch 1/100
2021-01-01 21:23:44.777056: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublas64_11.dll
2021-01-01 21:23:44.975699: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublasLt64_11.dll
86/86 - 1s - loss: 1.0967 - accuracy: 0.4373 - val_loss: 1.0510 - val_accuracy: 0.4548
Epoch 2/100
86/86 - 0s - loss: 1.0112 - accuracy: 0.4635 - val_loss: 0.9694 - val_accuracy: 0.5569
Epoch 3/100
86/86 - 0s - loss: 0.9598 - accuracy: 0.5160 - val_loss: 0.9347 - val_accuracy: 0.5816
Epoch 4/100
86/86 - 0s - loss: 0.9258 - accuracy: 0.5354 - val_loss: 0.9288 - val_accuracy: 0.5656
Epoch 5/100
86/86 - 0s - loss: 0.9192 - accuracy: 0.5500 - val_loss: 0.9198 - val_accuracy: 0.5598
Epoch 6/100
86/86 - 0s - loss: 0.8993 - accuracy: 0.5569 - val_loss: 0.9004 - val_accuracy: 0.5860
...
Epoch 97/100
86/86 - 0s - loss: 0.7654 - accuracy: 0.6269 - val_loss: 0.8600 - val_accuracy: 0.6064
Epoch 98/100
86/86 - 0s - loss: 0.7719 - accuracy: 0.6109 - val_loss: 0.8523 - val_accuracy: 0.6152
Epoch 99/100
86/86 - 0s - loss: 0.7643 - accuracy: 0.6233 - val_loss: 0.8431 - val_accuracy: 0.6137
Epoch 100/100
86/86 - 0s - loss: 0.7595 - accuracy: 0.6145 - val_loss: 0.8404 - val_accuracy: 0.6064
46/46 [==============================] - 0s 573us/step
[2 1 2 ... 2 1 2]
Confusion matrix, without normalization
[[319 154 11]
[165 395 119]
[ 12 130 165]]
most of predictions are close, very rare to consider good as poor or vice versa.
reference:
dataset
one hot encode output categories
regularizer
output multiple categories
No comments:
Post a Comment