Thursday 31 December 2020

keras 4 predict if housing price is above median price 1

goal is to predict if house value is above or below average based on 10 types of measurements
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
import numpy as np
from random import randint
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('data/housepricedata.csv')

dataset = df.values

samples = dataset[0:1000, 0:10]
labels = dataset[0:1000, 10]
samples, labels = shuffle(samples, labels)

#scale input between 0, 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_samples = scaler.fit_transform(samples)

#split dataset 70% training, 30% testing
train_samples, test_samples, train_labels, test_labels = train_test_split(scaled_train_samples, labels, test_size=0.3)
print(train_samples.shape, train_labels.shape, test_labels.shape, test_labels.shape)

#train on GPU
pysical_devices = tf.config.experimental.list_physical_devices('GPU')
#print("Num GPUs Available: ", len(pysical_devices))
tf.config.experimental.set_memory_growth(pysical_devices[0], True)

#create model
model = Sequential([
    Dense(units=32, input_shape=(10,), activation='relu'),
    Dense(units=32, activation='relu'),
    Dense(units=32, activation='relu'),
    Dense(units=2, activation='softmax')

#training & validation
model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy']), y=train_labels, validation_split=0.1, batch_size=32, epochs=200, shuffle=True, verbose=2)

#save model'models/house.h5')

predictions = model.predict(x=test_samples, batch_size=32, verbose=1)

#predicted output index
rounded_predictions = np.argmax(predictions, axis=-1)

#visualize prediction accuracy - confusion matrix
cm = confusion_matrix(y_true=test_labels, y_pred=rounded_predictions)

def plot_confusion_matrix(cm, classes,
                        title='Confusion matrix',
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
        print('Confusion matrix, without normalization')


    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')

cm_plot_labels = ['below avg', 'above_avg']

plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')

Model: "sequential"
Layer (type)                 Output Shape              Param #
dense (Dense)                (None, 32)                352
dense_1 (Dense)              (None, 32)                1056
dense_2 (Dense)              (None, 32)                1056
dense_3 (Dense)              (None, 2)                 66
Total params: 2,530
Trainable params: 2,530
Non-trainable params: 0
2020-12-31 16:26:55.838065: I tensorflow/compiler/mlir/] None of the MLIR optimization passes are enabled (register
ed 2)
Epoch 1/200
2020-12-31 16:26:56.160440: I tensorflow/stream_executor/platform/default/] Successfully opened dynamic library cublas64_11.dll
2020-12-31 16:26:56.347504: I tensorflow/stream_executor/platform/default/] Successfully opened dynamic library cublasLt64_11.dll
20/20 - 1s - loss: 0.6870 - accuracy: 0.5032 - val_loss: 0.6569 - val_accuracy: 0.5857
Epoch 2/200
20/20 - 0s - loss: 0.6803 - accuracy: 0.5032 - val_loss: 0.6507 - val_accuracy: 0.5857
Epoch 3/200
20/20 - 0s - loss: 0.6741 - accuracy: 0.5032 - val_loss: 0.6449 - val_accuracy: 0.5857
Epoch 4/200
20/20 - 0s - loss: 0.6686 - accuracy: 0.5032 - val_loss: 0.6394 - val_accuracy: 0.5857
Epoch 5/200
20/20 - 0s - loss: 0.6636 - accuracy: 0.5032 - val_loss: 0.6352 - val_accuracy: 0.5857
Epoch 198/200
20/20 - 0s - loss: 0.2478 - accuracy: 0.9048 - val_loss: 0.2351 - val_accuracy: 0.8857
Epoch 199/200
20/20 - 0s - loss: 0.2472 - accuracy: 0.9048 - val_loss: 0.2351 - val_accuracy: 0.8857
Epoch 200/200
20/20 - 0s - loss: 0.2467 - accuracy: 0.9048 - val_loss: 0.2313 - val_accuracy: 0.9000
10/10 [==============================] - 0s 665us/step
Confusion matrix, without normalization
[[143  13]
 [ 17 127]]

