Tuesday, 5 November 2019

python finance 7 merge csv

merge all csv files into one that has date and each company's daily closing price

about 500 companies record, skip a few unfounded csv file

save merged csv in joined_closes

import datetime as dt
import pandas as pd
import pandas_datareader.data as web
import bs4 as bs
import pickle
import requests
import os

def compile_data():
    with open('sp500tickers.pickle', 'rb') as f:
        tickers = pickle.load(f)

    main_df = pd.DataFrame()

    for count, ticker in enumerate(tickers):
        try:
            ticker = ticker.strip('\n').split('.')[0]

            df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
            df.set_index('Date', inplace=True)

            #only interested in closing price, drop the rest, change column header to company name
            df.rename(columns = {'Adj Close': ticker}, inplace=True)
            df.drop(['Open', 'High', 'Low', 'Close', 'Volume'], 1, inplace=True)

            if main_df.empty:
                main_df = df
            else:
                #concat columns from different csv
                main_df = main_df.join(df, how='outer')

            if count % 10 == 0:
                print(count)
        except:
            print(ticker, ' not found')

    print(main_df.tail())
    main_df.to_csv('sp500_joined_closes.csv')

compile_data()

reference:
https://www.youtube.com/watch?v=j0zW_KXyQJ4&list=PLQVvvaa0QuDcOdF96TBtRtuQksErCEBYZ&index=7

No comments:

Post a Comment