Thursday 21 November 2019

quantopian lecture pandas 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

dict_data = {
    'a' : [1, 2, 3, 4, 5],
    'b' : ['L', 'K', 'J', 'M', 'Z'],
    'c' : np.random.normal(0, 1, 5)
}

frame_data = pd.DataFrame(dict_data, index=pd.date_range('2016-01-01', periods=5))
print frame_data

            a  b         c
2016-01-01  1  L  1.244900
2016-01-02  2  K -0.342299
2016-01-03  3  J  0.622280
2016-01-04  4  M  0.568443
2016-01-05  5  Z -1.024938

s_1 = pd.Series([2, 4, 6, 8, 10], name='Evens')
s_2 = pd.Series([1, 3, 5, 7, 9], name="Odds")
numbers = pd.concat([s_1, s_2], axis=1)
print numbers
numbers.columns = ['Shmevens', 'Shmodds']
print numbers
numbers.index = pd.date_range("2016-01-01", periods=len(numbers))
print numbers
numbers.values

   Evens  Odds
0      2     1
1      4     3
2      6     5
3      8     7
4     10     9
   Shmevens  Shmodds
0         2        1
1         4        3
2         6        5
3         8        7
4        10        9
            Shmevens  Shmodds
2016-01-01         2        1
2016-01-02         4        3
2016-01-03         6        5
2016-01-04         8        7
2016-01-05        10        9
array([[ 2,  1],
       [ 4,  3],
       [ 6,  5],
       [ 8,  7],
       [10,  9]])

symbol = ["CMG", "MCD", "SHAK", "WFM"]
start = "2012-01-01"
end = "2016-01-01"
prices = get_pricing(symbol, start_date=start, end_date=end, fields="price")
prices.columns = map(lambda x: x.symbol, prices.columns)

prices.loc[:, 'CMG'].head()
2012-01-03 00:00:00+00:00    340.980
2012-01-04 00:00:00+00:00    348.740
2012-01-05 00:00:00+00:00    349.990
2012-01-06 00:00:00+00:00    348.950
2012-01-09 00:00:00+00:00    339.522
Freq: C, Name: CMG, dtype: float64

prices.loc[:, ['CMG', 'MCD']].head()
CMG MCD
2012-01-03 00:00:00+00:00 340.980 86.631
2012-01-04 00:00:00+00:00 348.740 87.166
2012-01-05 00:00:00+00:00 349.990 87.526
2012-01-06 00:00:00+00:00 348.950 88.192
2012-01-09 00:00:00+00:00 339.522 87.342

prices.loc['2015-12-15':'2015-12-22']
CMG MCD SHAK WFM
2015-12-15 00:00:00+00:00 555.64 116.96 41.510 32.96
2015-12-16 00:00:00+00:00 568.50 117.85 40.140 33.65
2015-12-17 00:00:00+00:00 554.91 117.54 38.500 33.38
2015-12-18 00:00:00+00:00 541.08 116.58 39.380 32.72
2015-12-21 00:00:00+00:00 521.71 117.70 38.205 32.98
2015-12-22 00:00:00+00:00 495.41 117.71 39.760 34.79

prices.loc['2015-12-15':'2015-12-22', ['CMG', 'MCD']]
CMG MCD
2015-12-15 00:00:00+00:00 555.64 116.96
2015-12-16 00:00:00+00:00 568.50 117.85
2015-12-17 00:00:00+00:00 554.91 117.54
2015-12-18 00:00:00+00:00 541.08 116.58
2015-12-21 00:00:00+00:00 521.71 117.70
2015-12-22 00:00:00+00:00 495.41 117.71

# Access prices with integer index in
# [1, 3, 5, 7, 9, 11, 13, ..., 99]
# and in column 0 or 3
prices.iloc[range(1, 100, 2), [0, 3]].head(20)
CMG WFM
2012-01-04 00:00:00+00:00 348.74 33.650
2012-01-06 00:00:00+00:00 348.95 34.319
2012-01-10 00:00:00+00:00 340.70 34.224
2012-01-12 00:00:00+00:00 347.83 33.913
2012-01-17 00:00:00+00:00 353.61 36.230
2012-01-19 00:00:00+00:00 358.10 36.489
2012-01-23 00:00:00+00:00 360.53 35.918
2012-01-25 00:00:00+00:00 363.28 36.404
2012-01-27 00:00:00+00:00 366.80 35.338
2012-01-31 00:00:00+00:00 367.58 34.932
2012-02-02 00:00:00+00:00 362.64 35.673
2012-02-06 00:00:00+00:00 371.65 36.055
2012-02-08 00:00:00+00:00 373.81 36.768
2012-02-10 00:00:00+00:00 376.39 38.509
2012-02-14 00:00:00+00:00 379.14 38.216
2012-02-16 00:00:00+00:00 381.91 38.032
2012-02-21 00:00:00+00:00 383.86 38.018
2012-02-23 00:00:00+00:00 386.82 38.240
2012-02-27 00:00:00+00:00 389.11 38.528
2012-02-29 00:00:00+00:00 390.47 38.098

#boolean index
prices.loc[prices.MCD > prices.WFM].head()
CMG MCD SHAK WFM
2012-01-03 00:00:00+00:00 340.980 86.631 NaN 32.788
2012-01-04 00:00:00+00:00 348.740 87.166 NaN 33.650
2012-01-05 00:00:00+00:00 349.990 87.526 NaN 34.257
2012-01-06 00:00:00+00:00 348.950 88.192 NaN 34.319
2012-01-09 00:00:00+00:00 339.522 87.342 NaN 34.323

prices.loc[(prices.MCD > prices.WFM) & ~prices.SHAK.isnull()].head()
CMG MCD SHAK WFM
2015-01-30 00:00:00+00:00 709.58 89.331 45.76 51.583
2015-02-02 00:00:00+00:00 712.69 89.418 43.50 52.623
2015-02-03 00:00:00+00:00 726.07 90.791 44.87 52.880
2015-02-04 00:00:00+00:00 675.99 90.887 41.32 53.138
2015-02-05 00:00:00+00:00 670.57 91.177 42.46 52.851

#add column
s_1 = get_pricing('TSLA', start_date=start, end_date=end, fields='price')
prices.loc[:, 'TSLA'] = s_1
prices.head(5)
CMG MCD SHAK WFM TSLA
2012-01-03 00:00:00+00:00 340.980 86.631 NaN 32.788 28.06
2012-01-04 00:00:00+00:00 348.740 87.166 NaN 33.650 27.71
2012-01-05 00:00:00+00:00 349.990 87.526 NaN 34.257 27.12
2012-01-06 00:00:00+00:00 348.950 88.192 NaN 34.319 26.94
2012-01-09 00:00:00+00:00 339.522 87.342 NaN 34.323 27.21

#delete column
prices = prices.drop('TSLA', axis=1)
prices.head(5)
CMG MCD SHAK WFM
2012-01-03 00:00:00+00:00 340.980 86.631 NaN 32.788
2012-01-04 00:00:00+00:00 348.740 87.166 NaN 33.650
2012-01-05 00:00:00+00:00 349.990 87.526 NaN 34.257
2012-01-06 00:00:00+00:00 348.950 88.192 NaN 34.319
2012-01-09 00:00:00+00:00 339.522 87.342 NaN 34.323

#combine column
df_1 = get_pricing(['SPY', 'VXX'], start_date=start, end_date=end, fields='price')
df_2 = get_pricing(['MSFT', 'AAPL', 'GOOG'], start_date=start, end_date=end, fields='price')
df_3 = pd.concat([df_1, df_2], axis=1)
df_3.head()
Equity(8554 [SPY]) Equity(51653 [VXX]) Equity(5061 [MSFT]) Equity(24 [AAPL]) Equity(46631 [GOOG])
2012-01-03 00:00:00+00:00 118.414 NaN 23.997 54.684 NaN
2012-01-04 00:00:00+00:00 118.498 NaN 24.498 54.995 NaN
2012-01-05 00:00:00+00:00 118.850 NaN 24.749 55.597 NaN
2012-01-06 00:00:00+00:00 118.600 NaN 25.151 56.194 NaN
2012-01-09 00:00:00+00:00 118.795 NaN 24.811 56.098 NaN

No comments:

Post a Comment