Date Open High Low Close Volume
0 1/3/2012 325.25 332.83 324.97 663.59 7,380,500
1 1/4/2012 331.27 333.87 329.08 666.45 5,749,400
2 1/5/2012 329.83 330.75 326.89 657.21 6,590,300
3 1/6/2012 328.34 328.77 323.68 648.24 5,405,900
4 1/9/2012 322.04 322.29 309.46 620.76 11,688,800
... ... ... ... ... ... ...
1253 12/23/2016 790.90 792.74 787.28 789.91 623,400
1254 12/27/2016 790.68 797.86 787.66 791.55 789,100
1255 12/28/2016 793.70 794.23 783.20 785.05 1,153,800
1256 12/29/2016 783.33 785.93 778.92 782.79 744,300
1257 12/30/2016 782.75 782.78 770.41 771.82 1,770,000
#testing data from Jan 3 2017 to Jan 31 2017
Date Open High Low Close Volume
0 1/3/2017 778.81 789.63 775.80 786.14 1,657,300
1 1/4/2017 788.36 791.34 783.16 786.90 1,073,000
2 1/5/2017 786.08 794.48 785.02 794.02 1,335,200
3 1/6/2017 795.26 807.90 792.20 806.15 1,640,200
...
16 1/26/2017 837.81 838.00 827.01 832.15 2,973,900
17 1/27/2017 834.71 841.95 820.44 823.31 2,965,800
18 1/30/2017 814.66 815.84 799.80 802.32 3,246,600
19 1/31/2017 796.86 801.25 790.52 796.79 2,160,600
#goal is to construct a model using 5 years' stock price to forecast next month stock trend
predicted trend follows the real one
#stock.py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.models import load_model
import matplotlib.dates as mdates
csv = pd.read_csv("stock_train.csv")
#print(csv)
data = csv.loc[:, ("Open")].values
#print(data)
#scale input between 0, 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data.reshape(-1, 1))
#print(scaled_data)
#print(scaled_data.shape[0])
x = []
y = []
#model is trained by sampling 50 days' stock price as input to fit the 51st day price as output
#y lags x by 50 samples
for i in range(50, scaled_data.shape[0]):
x.append(scaled_data[i-50:i, 0])
y.append(scaled_data[i, 0])
x, y = np.array(x), np.array(y)
x = np.reshape(x, (x.shape[0], x.shape[1], 1))
#print(x.shape)
#(1208, 50, 1)
#print(x[0])
#print(y.shape)
#(1208,)
#print(y)
#train on GPU
pysical_devices = tf.config.experimental.list_physical_devices('GPU')
#print("Num GPUs Available: ", len(pysical_devices))
tf.config.experimental.set_memory_growth(pysical_devices[0], True)
"""
model = Sequential()
model.add(LSTM(128, input_shape=(50, 1), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(128))
model.add(Dropout(0.1))
model.add(Dense(1))
model.summary()
model.compile(
loss='mean_squared_error',
optimizer='adam',
)
model.fit(x,
y,
epochs=100,
batch_size=32)
model.save("stock.h5")
"""
#append test.csv to train.csv
csv2 = pd.read_csv("stock_test.csv")
print(csv2)
csv3 = pd.concat((csv, csv2), axis=0)
#because model use previous 50 days's sample to predict 51st day,
#needs to grab last 50 sample from train data and concat with test data
#get last 50 from train.csv plus all data from test.csv
data = csv3.loc[:, ("Open")].iloc[x.shape[0]:, ].values
#print(data.shape)
#print(data)
scaled_data = scaler.fit_transform(data.reshape(-1, 1))
x_test = []
for i in range(50, scaled_data.shape[0]):
x_test.append(scaled_data[i-50:i, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
print(x_test.shape)
#(20, 50, 1)
model = load_model('stock.h5')
predicted_stock_price = model.predict(x_test)
#print(predicted_stock_price.shape)
#print(predicted_stock_price)
#modal output is scaled between 0 and 1, needs convert back to real value
predicted_stock_price = scaler.inverse_transform(predicted_stock_price)
#rint(predicted_stock_price)
ax = plt.figure(figsize=(7, 5), dpi=100).add_subplot(111)
t = csv2.loc[:, "Date"].values
t = pd.to_datetime(t)
ax.plot(t, csv2.loc[:, ("Open")].values, label="Real stock price", color="red")
ax.plot(t, predicted_stock_price, label="Predicted Stock Price", color="blue")
ax.legend()
ax.xaxis.set_major_locator(mdates.DayLocator(interval=4))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d %Y'))
ax.set_title("Stock Price Prediction")
plt.ylabel("USD")
ax.xaxis.grid(True, which="major")
ax.yaxis.grid(True, which="major")
plt.xticks(rotation=45)
plt.subplots_adjust(bottom=.2)
plt.show()
------------------------------
#logs
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 50, 128) 66560
_________________________________________________________________
dropout (Dropout) (None, 50, 128) 0
_________________________________________________________________
lstm_1 (LSTM) (None, 50, 128) 131584
_________________________________________________________________
dropout_1 (Dropout) (None, 50, 128) 0
_________________________________________________________________
lstm_2 (LSTM) (None, 50, 128) 131584
_________________________________________________________________
dropout_2 (Dropout) (None, 50, 128) 0
_________________________________________________________________
lstm_3 (LSTM) (None, 128) 131584
_________________________________________________________________
dropout_3 (Dropout) (None, 128) 0
_________________________________________________________________
dense (Dense) (None, 1) 129
=================================================================
Total params: 461,441
Trainable params: 461,441
Non-trainable params: 0
Epoch 1/100
38/38 [==============================] - 5s 14ms/step - loss: 0.1101
Epoch 2/100
38/38 [==============================] - 1s 14ms/step - loss: 0.0039
Epoch 3/100
38/38 [==============================] - 1s 13ms/step - loss: 0.0030
...
Epoch 98/100
38/38 [==============================] - 0s 13ms/step - loss: 8.8894e-04
Epoch 99/100
38/38 [==============================] - 0s 13ms/step - loss: 8.8318e-04
Epoch 100/100
38/38 [==============================] - 0s 13ms/step - loss: 8.7235e-04
reference:
dataset
plt plot
'numpy.ndarray' object has no attribute 'strftime'
No comments:
Post a Comment