Thursday 31 December 2020

keras 4 predict if housing price is above median price 1

goal is to predict if house value is above or below average based on 10 types of measurements

#house.py
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
import numpy as np
from random import randint
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('data/housepricedata.csv')
#print(df.head())

dataset = df.values
#print(dataset)

samples = dataset[0:1000, 0:10]
labels = dataset[0:1000, 10]
samples, labels = shuffle(samples, labels)

#scale input between 0, 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_samples = scaler.fit_transform(samples)

#split dataset 70% training, 30% testing
train_samples, test_samples, train_labels, test_labels = train_test_split(scaled_train_samples, labels, test_size=0.3)
print(train_samples.shape, train_labels.shape, test_labels.shape, test_labels.shape)

#train on GPU
pysical_devices = tf.config.experimental.list_physical_devices('GPU')
#print("Num GPUs Available: ", len(pysical_devices))
tf.config.experimental.set_memory_growth(pysical_devices[0], True)

#create model
model = Sequential([
    Dense(units=32, input_shape=(10,), activation='relu'),
    Dense(units=32, activation='relu'),
    Dense(units=32, activation='relu'),
    Dense(units=2, activation='softmax')
])
model.summary()

#training & validation
model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x=train_samples, y=train_labels, validation_split=0.1, batch_size=32, epochs=200, shuffle=True, verbose=2)

#save model
#model.save('models/house.h5')

#prediction
predictions = model.predict(x=test_samples, batch_size=32, verbose=1)

#predicted output index
rounded_predictions = np.argmax(predictions, axis=-1)

#visualize prediction accuracy - confusion matrix
cm = confusion_matrix(y_true=test_labels, y_pred=rounded_predictions)

def plot_confusion_matrix(cm, classes,
                        normalize=False,
                        title='Confusion matrix',
                        cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

cm_plot_labels = ['below avg', 'above_avg']

plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix')

----------------------------------------
#logs
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
dense (Dense)                (None, 32)                352
_________________________________________________________________
dense_1 (Dense)              (None, 32)                1056
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 66
=================================================================
Total params: 2,530
Trainable params: 2,530
Non-trainable params: 0
_________________________________________________________________
2020-12-31 16:26:55.838065: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (register
ed 2)
Epoch 1/200
2020-12-31 16:26:56.160440: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublas64_11.dll
2020-12-31 16:26:56.347504: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublasLt64_11.dll
20/20 - 1s - loss: 0.6870 - accuracy: 0.5032 - val_loss: 0.6569 - val_accuracy: 0.5857
Epoch 2/200
20/20 - 0s - loss: 0.6803 - accuracy: 0.5032 - val_loss: 0.6507 - val_accuracy: 0.5857
Epoch 3/200
20/20 - 0s - loss: 0.6741 - accuracy: 0.5032 - val_loss: 0.6449 - val_accuracy: 0.5857
Epoch 4/200
20/20 - 0s - loss: 0.6686 - accuracy: 0.5032 - val_loss: 0.6394 - val_accuracy: 0.5857
Epoch 5/200
20/20 - 0s - loss: 0.6636 - accuracy: 0.5032 - val_loss: 0.6352 - val_accuracy: 0.5857
...
Epoch 198/200
20/20 - 0s - loss: 0.2478 - accuracy: 0.9048 - val_loss: 0.2351 - val_accuracy: 0.8857
Epoch 199/200
20/20 - 0s - loss: 0.2472 - accuracy: 0.9048 - val_loss: 0.2351 - val_accuracy: 0.8857
Epoch 200/200
20/20 - 0s - loss: 0.2467 - accuracy: 0.9048 - val_loss: 0.2313 - val_accuracy: 0.9000
10/10 [==============================] - 0s 665us/step
Confusion matrix, without normalization
[[143  13]
 [ 17 127]]

No comments:

Post a Comment