Friday 1 January 2021

keras 6 predict wine quality

predict the quality of wine based on different aspects. quality will be divided into poor, common, good.

#wine.py
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
import numpy as np
from random import randint
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

df = pd.read_csv('data/winequality-white.csv')
"""
with pd.option_context( 'display.max_columns', None, 'display.width', 1000):
    print(df.head())
"""
dataset = df.values

samples = dataset[:, 0:11]
labels = dataset[:, 11]

#divide wine into 3 qualities - poor (0), common (6), good (10)
labels[labels < 6] = 0
labels[labels > 6] = 10
samples, labels = shuffle(samples, labels)
#print(labels)
#[ 0.  0. 10. ...  6.  0. 10.]

#ncode output categories
encoder = LabelEncoder()
encoder.fit(labels)
encoded_labels = encoder.transform(labels)
one_hot_labels = to_categorical(encoded_labels)
labels_index = np.argmax(one_hot_labels, axis=-1)


with np.printoptions(threshold=np.inf):
    print(one_hot_labels[0:5])
    print(labels_index[0:10])
"""
one_hot_labels
[[1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]]

labels_index
[0 0 2 2 1 0 1 2 0 1]
"""
#scale input between 0, 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_samples = scaler.fit_transform(samples)

#split dataset 70% training, 30% testing
train_samples, test_samples, train_labels, test_labels = train_test_split(scaled_train_samples, labels_index, test_size=0.3)
#print(train_samples.shape, train_labels.shape, test_labels.shape, test_labels.shape)

#train on GPU
pysical_devices = tf.config.experimental.list_physical_devices('GPU')
#print("Num GPUs Available: ", len(pysical_devices))
tf.config.experimental.set_memory_growth(pysical_devices[0], True)

#create model
model = Sequential([
    Dense(units=128, input_shape=(11,), activation='relu',
          activity_regularizer=regularizers.l2(0.1)),
    Dropout(0.2),
    Dense(units=64, activation='relu'),
    Dropout(0.2),
    Dense(units=8, activation='relu'),
    Dropout(0.2),
    Dense(units=3, activation='softmax')
])
model.summary()
#training & validation
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x=train_samples, y=train_labels, validation_split=0.2, batch_size=32, epochs=100, shuffle=True, verbose=2)

#prediction
predictions = model.predict(x=test_samples, batch_size=32, verbose=1)
#print(predictions)

#predicted output index
rounded_predictions = np.argmax(predictions, axis=-1)
print(rounded_predictions)

# visualize prediction accuracy - confusion matrix
cm = confusion_matrix(y_true=test_labels, y_pred=rounded_predictions)


def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

#quality category
cm_plot_labels = ['poor', 'common', 'good']

plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')


--------------------------------------
#logs
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
dense (Dense)                (None, 128)               1536
_________________________________________________________________
dropout (Dropout)            (None, 128)               0
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 520
_________________________________________________________________
dropout_2 (Dropout)          (None, 8)                 0
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 27
=================================================================
Total params: 10,339
Trainable params: 10,339
Non-trainable params: 0
_________________________________________________________________
2021-01-01 21:23:44.355260: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (register
ed 2)
Epoch 1/100
2021-01-01 21:23:44.777056: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublas64_11.dll
2021-01-01 21:23:44.975699: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublasLt64_11.dll
86/86 - 1s - loss: 1.0967 - accuracy: 0.4373 - val_loss: 1.0510 - val_accuracy: 0.4548
Epoch 2/100
86/86 - 0s - loss: 1.0112 - accuracy: 0.4635 - val_loss: 0.9694 - val_accuracy: 0.5569
Epoch 3/100
86/86 - 0s - loss: 0.9598 - accuracy: 0.5160 - val_loss: 0.9347 - val_accuracy: 0.5816
Epoch 4/100
86/86 - 0s - loss: 0.9258 - accuracy: 0.5354 - val_loss: 0.9288 - val_accuracy: 0.5656
Epoch 5/100
86/86 - 0s - loss: 0.9192 - accuracy: 0.5500 - val_loss: 0.9198 - val_accuracy: 0.5598
Epoch 6/100
86/86 - 0s - loss: 0.8993 - accuracy: 0.5569 - val_loss: 0.9004 - val_accuracy: 0.5860
...
Epoch 97/100
86/86 - 0s - loss: 0.7654 - accuracy: 0.6269 - val_loss: 0.8600 - val_accuracy: 0.6064
Epoch 98/100
86/86 - 0s - loss: 0.7719 - accuracy: 0.6109 - val_loss: 0.8523 - val_accuracy: 0.6152
Epoch 99/100
86/86 - 0s - loss: 0.7643 - accuracy: 0.6233 - val_loss: 0.8431 - val_accuracy: 0.6137
Epoch 100/100
86/86 - 0s - loss: 0.7595 - accuracy: 0.6145 - val_loss: 0.8404 - val_accuracy: 0.6064
46/46 [==============================] - 0s 573us/step
[2 1 2 ... 2 1 2]
Confusion matrix, without normalization
[[319 154  11]
 [165 395 119]
 [ 12 130 165]]

No comments:

Post a Comment