Tuesday, 5 November 2019

python finance 6 downloading all s&p 500 historic data

save data in symbol.csv

data contains date, high, low, open, close, volume, adj close

resume downloading from last, if not downloaded in one time

import datetime as dt
import pandas_datareader.data as web
import bs4 as bs
import pickle
import requests
import os

#get all s&p500 symbols, save in pickle
def save_sp500_tickers():
    resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        tickers.append(ticker)

    with open('sp500tickers.pickle', 'wb') as f:
        pickle.dump(tickers, f)

    #print(tickers)
    return tickers

#save_sp500_tickers()

#get historic data from yahoo for all companies in pickle file
def get_data_from_yahoo(reload_sp500=False):
    if reload_sp500:
        tickers = save_sp500_tickers()
    else:
        with open('sp500tickers.pickle','rb') as f:
            tickers = pickle.load(f)

    if not os.path.exists('stock_dfs'):
        os.makedirs('stock_dfs')

    start = dt.datetime(2011, 1, 1)
    end = dt.datetime(2019, 10, 31)

    for ticker in tickers:
        try:
            ticker = ticker.strip('\n').split('.')[0]
            if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
                print('downloading ', ticker)
                df = web.DataReader(ticker, 'yahoo', start, end)
                df.to_csv('stock_dfs/{}.csv'.format(ticker))
            else:
                print('Already have {}'.format(ticker))
        except:
            print(ticker, ' not found')
get_data_from_yahoo()

reference:
https://www.youtube.com/watch?v=baCAFPHb1o4&list=PLQVvvaa0QuDcOdF96TBtRtuQksErCEBYZ&index=6
https://github.com/pydata/pandas-datareader/issues/614

No comments:

Post a Comment