Sunday, 24 January 2021

keras 16 RNN predict stock price

#training data from Jan 3 2012 to Dec 30 2016 
            Date    Open    High     Low   Close      Volume
0       1/3/2012  325.25  332.83  324.97  663.59   7,380,500
1       1/4/2012  331.27  333.87  329.08  666.45   5,749,400
2       1/5/2012  329.83  330.75  326.89  657.21   6,590,300
3       1/6/2012  328.34  328.77  323.68  648.24   5,405,900
4       1/9/2012  322.04  322.29  309.46  620.76  11,688,800
...          ...     ...     ...     ...     ...         ...
1253  12/23/2016  790.90  792.74  787.28  789.91     623,400
1254  12/27/2016  790.68  797.86  787.66  791.55     789,100
1255  12/28/2016  793.70  794.23  783.20  785.05   1,153,800
1256  12/29/2016  783.33  785.93  778.92  782.79     744,300
1257  12/30/2016  782.75  782.78  770.41  771.82   1,770,000

#testing data from Jan 3 2017 to Jan 31 2017
         Date    Open    High     Low   Close     Volume
0    1/3/2017  778.81  789.63  775.80  786.14  1,657,300
1    1/4/2017  788.36  791.34  783.16  786.90  1,073,000
2    1/5/2017  786.08  794.48  785.02  794.02  1,335,200
3    1/6/2017  795.26  807.90  792.20  806.15  1,640,200
...
16  1/26/2017  837.81  838.00  827.01  832.15  2,973,900
17  1/27/2017  834.71  841.95  820.44  823.31  2,965,800
18  1/30/2017  814.66  815.84  799.80  802.32  3,246,600
19  1/31/2017  796.86  801.25  790.52  796.79  2,160,600

#goal is to construct a model using 5 years' stock price to forecast next month stock trend

predicted trend follows the real one

#stock.py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.models import load_model
import matplotlib.dates as mdates

csv = pd.read_csv("stock_train.csv")
#print(csv)

data = csv.loc[:, ("Open")].values
#print(data)

#scale input between 0, 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data.reshape(-1, 1))
#print(scaled_data)
#print(scaled_data.shape[0])

x = []
y = []
#model is trained by sampling 50 days' stock price as input to fit the 51st day price as output
#y lags x by 50 samples
for i in range(50, scaled_data.shape[0]):
    x.append(scaled_data[i-50:i, 0])
    y.append(scaled_data[i, 0])

x, y = np.array(x), np.array(y)

x = np.reshape(x, (x.shape[0], x.shape[1], 1))
#print(x.shape)
#(1208, 50, 1)
#print(x[0])
#print(y.shape)
#(1208,)
#print(y)

#train on GPU
pysical_devices = tf.config.experimental.list_physical_devices('GPU')
#print("Num GPUs Available: ", len(pysical_devices))
tf.config.experimental.set_memory_growth(pysical_devices[0], True)
"""
model = Sequential()
model.add(LSTM(128, input_shape=(50, 1), return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))

model.add(LSTM(128))
model.add(Dropout(0.1))

model.add(Dense(1))
model.summary()

model.compile(
    loss='mean_squared_error',
    optimizer='adam',
)

model.fit(x,
          y,
          epochs=100,
          batch_size=32)

model.save("stock.h5")
"""
#append test.csv to train.csv
csv2 = pd.read_csv("stock_test.csv")
print(csv2)
csv3 = pd.concat((csv, csv2), axis=0)

#because model use previous 50 days's sample to predict 51st day, 
#needs to grab last 50 sample from train data and concat with test data
#get last 50 from train.csv plus all data from test.csv
data = csv3.loc[:, ("Open")].iloc[x.shape[0]:, ].values
#print(data.shape)
#print(data)

scaled_data = scaler.fit_transform(data.reshape(-1, 1))

x_test = []

for i in range(50, scaled_data.shape[0]):
    x_test.append(scaled_data[i-50:i, 0])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

print(x_test.shape)
#(20, 50, 1)

model = load_model('stock.h5')

predicted_stock_price = model.predict(x_test)
#print(predicted_stock_price.shape)
#print(predicted_stock_price)

#modal output is scaled between 0 and 1, needs convert back to real value
predicted_stock_price = scaler.inverse_transform(predicted_stock_price)
#rint(predicted_stock_price)

ax = plt.figure(figsize=(7, 5), dpi=100).add_subplot(111)

t = csv2.loc[:, "Date"].values
t = pd.to_datetime(t)

ax.plot(t, csv2.loc[:, ("Open")].values, label="Real stock price", color="red")
ax.plot(t, predicted_stock_price, label="Predicted Stock Price", color="blue")

ax.legend()
ax.xaxis.set_major_locator(mdates.DayLocator(interval=4))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d %Y'))

ax.set_title("Stock Price Prediction")
plt.ylabel("USD")

ax.xaxis.grid(True, which="major")
ax.yaxis.grid(True, which="major")

plt.xticks(rotation=45)
plt.subplots_adjust(bottom=.2)
plt.show()

------------------------------
#logs
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
lstm (LSTM)                  (None, 50, 128)           66560
_________________________________________________________________
dropout (Dropout)            (None, 50, 128)           0
_________________________________________________________________
lstm_1 (LSTM)                (None, 50, 128)           131584
_________________________________________________________________
dropout_1 (Dropout)          (None, 50, 128)           0
_________________________________________________________________
lstm_2 (LSTM)                (None, 50, 128)           131584
_________________________________________________________________
dropout_2 (Dropout)          (None, 50, 128)           0
_________________________________________________________________
lstm_3 (LSTM)                (None, 128)               131584
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0
_________________________________________________________________
dense (Dense)                (None, 1)                 129
=================================================================
Total params: 461,441
Trainable params: 461,441
Non-trainable params: 0

Epoch 1/100
38/38 [==============================] - 5s 14ms/step - loss: 0.1101
Epoch 2/100
38/38 [==============================] - 1s 14ms/step - loss: 0.0039
Epoch 3/100
38/38 [==============================] - 1s 13ms/step - loss: 0.0030
...
Epoch 98/100
38/38 [==============================] - 0s 13ms/step - loss: 8.8894e-04
Epoch 99/100
38/38 [==============================] - 0s 13ms/step - loss: 8.8318e-04
Epoch 100/100
38/38 [==============================] - 0s 13ms/step - loss: 8.7235e-04

No comments:

Post a Comment