merge all csv files into one that has date and each company's daily closing price
about 500 companies record, skip a few unfounded csv file
save merged csv in joined_closes
import datetime as dt
import pandas as pd
import pandas_datareader.data as web
import bs4 as bs
import pickle
import requests
import os
def compile_data():
with open('sp500tickers.pickle', 'rb') as f:
tickers = pickle.load(f)
main_df = pd.DataFrame()
for count, ticker in enumerate(tickers):
try:
ticker = ticker.strip('\n').split('.')[0]
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
df.set_index('Date', inplace=True)
#only interested in closing price, drop the rest, change column header to company name
df.rename(columns = {'Adj Close': ticker}, inplace=True)
df.drop(['Open', 'High', 'Low', 'Close', 'Volume'], 1, inplace=True)
if main_df.empty:
main_df = df
else:
#concat columns from different csv
main_df = main_df.join(df, how='outer')
if count % 10 == 0:
print(count)
except:
print(ticker, ' not found')
print(main_df.tail())
main_df.to_csv('sp500_joined_closes.csv')
compile_data()
reference:
https://www.youtube.com/watch?v=j0zW_KXyQJ4&list=PLQVvvaa0QuDcOdF96TBtRtuQksErCEBYZ&index=7
No comments:
Post a Comment