Friday 1 January 2021

keras 6 predict wine quality

predict the quality of wine based on different aspects. quality will be divided into poor, common, good.
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
import numpy as np
from random import randint
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

df = pd.read_csv('data/winequality-white.csv')
with pd.option_context( 'display.max_columns', None, 'display.width', 1000):
dataset = df.values

samples = dataset[:, 0:11]
labels = dataset[:, 11]

#divide wine into 3 qualities - poor (0), common (6), good (10)
labels[labels < 6] = 0
labels[labels > 6] = 10
samples, labels = shuffle(samples, labels)
#[ 0.  0. 10. ...  6.  0. 10.]

#ncode output categories
encoder = LabelEncoder()
encoded_labels = encoder.transform(labels)
one_hot_labels = to_categorical(encoded_labels)
labels_index = np.argmax(one_hot_labels, axis=-1)

with np.printoptions(threshold=np.inf):
[[1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]]

[0 0 2 2 1 0 1 2 0 1]
#scale input between 0, 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_samples = scaler.fit_transform(samples)

#split dataset 70% training, 30% testing
train_samples, test_samples, train_labels, test_labels = train_test_split(scaled_train_samples, labels_index, test_size=0.3)
#print(train_samples.shape, train_labels.shape, test_labels.shape, test_labels.shape)

#train on GPU
pysical_devices = tf.config.experimental.list_physical_devices('GPU')
#print("Num GPUs Available: ", len(pysical_devices))
tf.config.experimental.set_memory_growth(pysical_devices[0], True)

#create model
model = Sequential([
    Dense(units=128, input_shape=(11,), activation='relu',
    Dense(units=64, activation='relu'),
    Dense(units=8, activation='relu'),
    Dense(units=3, activation='softmax')
#training & validation
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy']), y=train_labels, validation_split=0.2, batch_size=32, epochs=100, shuffle=True, verbose=2)

predictions = model.predict(x=test_samples, batch_size=32, verbose=1)

#predicted output index
rounded_predictions = np.argmax(predictions, axis=-1)

# visualize prediction accuracy - confusion matrix
cm = confusion_matrix(y_true=test_labels, y_pred=rounded_predictions)

def plot_confusion_matrix(cm, classes,
                          title='Confusion matrix',
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
        print('Confusion matrix, without normalization')


    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')

#quality category
cm_plot_labels = ['poor', 'common', 'good']

plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')

Model: "sequential"
Layer (type)                 Output Shape              Param #
dense (Dense)                (None, 128)               1536
dropout (Dropout)            (None, 128)               0
dense_1 (Dense)              (None, 64)                8256
dropout_1 (Dropout)          (None, 64)                0
dense_2 (Dense)              (None, 8)                 520
dropout_2 (Dropout)          (None, 8)                 0
dense_3 (Dense)              (None, 3)                 27
Total params: 10,339
Trainable params: 10,339
Non-trainable params: 0
2021-01-01 21:23:44.355260: I tensorflow/compiler/mlir/] None of the MLIR optimization passes are enabled (register
ed 2)
Epoch 1/100
2021-01-01 21:23:44.777056: I tensorflow/stream_executor/platform/default/] Successfully opened dynamic library cublas64_11.dll
2021-01-01 21:23:44.975699: I tensorflow/stream_executor/platform/default/] Successfully opened dynamic library cublasLt64_11.dll
86/86 - 1s - loss: 1.0967 - accuracy: 0.4373 - val_loss: 1.0510 - val_accuracy: 0.4548
Epoch 2/100
86/86 - 0s - loss: 1.0112 - accuracy: 0.4635 - val_loss: 0.9694 - val_accuracy: 0.5569
Epoch 3/100
86/86 - 0s - loss: 0.9598 - accuracy: 0.5160 - val_loss: 0.9347 - val_accuracy: 0.5816
Epoch 4/100
86/86 - 0s - loss: 0.9258 - accuracy: 0.5354 - val_loss: 0.9288 - val_accuracy: 0.5656
Epoch 5/100
86/86 - 0s - loss: 0.9192 - accuracy: 0.5500 - val_loss: 0.9198 - val_accuracy: 0.5598
Epoch 6/100
86/86 - 0s - loss: 0.8993 - accuracy: 0.5569 - val_loss: 0.9004 - val_accuracy: 0.5860
Epoch 97/100
86/86 - 0s - loss: 0.7654 - accuracy: 0.6269 - val_loss: 0.8600 - val_accuracy: 0.6064
Epoch 98/100
86/86 - 0s - loss: 0.7719 - accuracy: 0.6109 - val_loss: 0.8523 - val_accuracy: 0.6152
Epoch 99/100
86/86 - 0s - loss: 0.7643 - accuracy: 0.6233 - val_loss: 0.8431 - val_accuracy: 0.6137
Epoch 100/100
86/86 - 0s - loss: 0.7595 - accuracy: 0.6145 - val_loss: 0.8404 - val_accuracy: 0.6064
46/46 [==============================] - 0s 573us/step
[2 1 2 ... 2 1 2]
Confusion matrix, without normalization
[[319 154  11]
 [165 395 119]
 [ 12 130 165]]

