Chuanshuoge: python finance 6 downloading all s&p 500 historic data

Tuesday, 5 November 2019

python finance 6 downloading all s&p 500 historic data

save data in symbol.csv

data contains date, high, low, open, close, volume, adj close

resume downloading from last, if not downloaded in one time

import datetime as dt

import pandas_datareader.data as web

import bs4 as bs

import pickle

import requests

import os

#get all s&p500 symbols, save in pickle

def save_sp500_tickers():

resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')

soup = bs.BeautifulSoup(resp.text, 'lxml')

table = soup.find('table', {'class': 'wikitable sortable'})

tickers = []

for row in table.findAll('tr')[1:]:

ticker = row.findAll('td')[0].text

tickers.append(ticker)

with open('sp500tickers.pickle', 'wb') as f:

pickle.dump(tickers, f)

#print(tickers)

return tickers

#save_sp500_tickers()

#get historic data from yahoo for all companies in pickle file

def get_data_from_yahoo(reload_sp500=False):

if reload_sp500:

tickers = save_sp500_tickers()

else:

with open('sp500tickers.pickle','rb') as f:

tickers = pickle.load(f)

if not os.path.exists('stock_dfs'):

os.makedirs('stock_dfs')

start = dt.datetime(2011, 1, 1)

end = dt.datetime(2019, 10, 31)

for ticker in tickers:

try:

ticker = ticker.strip('\n').split('.')[0]

if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):

print('downloading ', ticker)

df = web.DataReader(ticker, 'yahoo', start, end)

df.to_csv('stock_dfs/{}.csv'.format(ticker))

else:

print('Already have {}'.format(ticker))

except:

print(ticker, ' not found')

get_data_from_yahoo()

reference:
https://www.youtube.com/watch?v=baCAFPHb1o4&list=PLQVvvaa0QuDcOdF96TBtRtuQksErCEBYZ&index=6
https://github.com/pydata/pandas-datareader/issues/614

Chuanshuoge

Tuesday, 5 November 2019

python finance 6 downloading all s&p 500 historic data

No comments:

Post a Comment

Chuanshuoge, Calgary, Canada, Earth, Solar system, Milky Way Galaxy

_ChuanShuo^Ge - _传说^哥