-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathReturn_portfolio.py
316 lines (261 loc) · 12.2 KB
/
Return_portfolio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
import numpy as np
import pandas as pd
import warnings as wn
def make_demo():
prices = get_sample_prices()
rets = prices.pct_change().dropna()
weights = pd.DataFrame([
{'Date': '2006-08-01', 'SPY': .5, 'TLT': .25, 'DBC': .25},
{'Date': '2010-08-02', 'SPY': .5, 'TLT': .25, 'DBC': .25},
{'Date': '2023-04-03', 'SPY': .5, 'TLT': .25, 'DBC': .25},
{'Date': '2023-05-03', 'SPY': .5, 'TLT': .25, 'DBC': .25},
])
weights.set_index('Date', inplace=True)
weights.index = pd.to_datetime(weights.index)
return Return_portfolio(rets, [.5, .25, .25], verbose=True, rebalance_on = 'months') #rebalance_on='months')#weights)
def get_sample_prices():
import yfinance as yf
# Download data for SPY and TLT
symbols = ["SPY", "TLT", "DBC"]
start_date = "1990-01-01"
end_date = pd.Timestamp.today().strftime("%Y-%m-%d")
df_list = []
for symbol in symbols:
data = yf.download(symbol, start=start_date, end=end_date)
data = data["Adj Close"]
data.name = symbol
df_list.append(data)
# Combine the data frames into one
return pd.concat(df_list, axis=1).dropna()
def endpoints(df, on = "M", offset = 0):
"""
Returns index of endpoints of a time series analogous to R's endpoints
function.
Takes in:
df -- a dataframe/series with a date index
on -- a string specifying frequency of endpoints
(E.G. "M" for months, "Q" for quarters, and so on)
offset -- to offset by a specified index on the original data
(E.G. if the data is daily resolution, offset of 1 offsets by a day)
This is to allow for timing luck analysis. Thank Corey Hoffstein.
"""
# to allow for familiarity with R
# "months" becomes "M" for resampling
if len(on) > 3:
on = on[0].capitalize()
# get index dates of formal endpoints
ep_dates = pd.Series(df.index, index = df.index).resample(on).max()
# get the integer indices of dates that are the endpoints
date_idx = np.where(df.index.isin(ep_dates))
# append last day
date_idx = np.append(date_idx, df.shape[0]-1)
if offset != 0:
date_idx = date_idx + offset
date_idx[date_idx < 0] = 0
date_idx[date_idx > df.shape[0]-1] = df.shape[0]-1
return np.unique(date_idx)
def compute_drifting_weights(w):
return w / np.sum(w, axis=1)[:, np.newaxis]
def compute_weights_and_returns(subset_returns, weights):
subset_returns = subset_returns.copy()
# assuming trading on close, return on rebalance day is 0 for new position
subset_returns[0] = 0
rep_weights = np.tile(weights, (subset_returns.shape[0], 1))
# cumulative returns weights
cum_subset_weights = np.multiply(
np.cumprod(1 + subset_returns, axis=0), rep_weights
)
cum_subset_weights_bop = np.divide(cum_subset_weights, 1 + subset_returns)
EOP_Weight = compute_drifting_weights(cum_subset_weights)
BOP_Weight = compute_drifting_weights(cum_subset_weights_bop)
portf_returns_subset = np.sum(np.multiply(subset_returns, BOP_Weight), axis=1)
return [portf_returns_subset, BOP_Weight, EOP_Weight]
def compute_turnover(w):
# add first rebalance, as we start with a non-invested portfolio
# not exact if cash is a valid position since we should consider being 100% invested in cash
w = pd.concat([w, pd.DataFrame(0, index=[w.index[0]], columns=w.columns)])
turnover = w.groupby(w.index).diff().abs().dropna(how='all').sum(axis=1)
turnover.name = 'Turnover'
return turnover
def prepare_weights(R, weights=None, rebalance_on=None):
# if no weights provided, create equal weight vector
if weights is None:
weights = [1./R.shape[1]] * R.shape[1]
wn.warn("Weights not provided, assuming equal weight for rebalancing periods.")
else:
weights = weights.copy()
# if weights aren't passed in as a data frame (they're probably a list)
# turn them into a 1 x num_assets data frame
if isinstance(weights, list):
weights = pd.Series(weights, index=R.columns).to_frame().T
elif isinstance(weights, pd.DataFrame):
pass
else:
raise TypeError("Only list or pd.DataFrame are accepted")
# error checking for same number of weights as assets
if weights.shape[1] != R.shape[1]:
raise ValueError("Number of weights is unequal to number of assets. Correct this.")
# if there's a row vector of weights, create a data frame with the desired
# rebalancing schedule -- also add in the very first date into the schedule
if weights.shape[0] == 1 and weights.index[0] == 0:
if rebalance_on is not None:
ep = endpoints(R, on=rebalance_on)
weights = pd.DataFrame(
np.tile(weights, (len(ep), 1)),
index=R.index[ep],
columns=R.columns
)
# add first date if not already there
if R.index[0] != weights.index[0]:
first_weights = pd.DataFrame(
[weights.iloc[0,:].values],
index=[R.index[0]],
columns=R.columns
)
weights = pd.concat([first_weights, weights], axis=0)
else:
# in that case, we assume, as in R implementation, that one invests
# one day before so that we dont lose 1 trading date
R = pd.concat([
pd.DataFrame(0, columns=R.columns,
index=[R.index[0]-pd.Timedelta(1, "d")]),
R
])
weights.index = [R.index[0]]
weights.index.name = R.index.name
if weights.isna().sum().sum() > 0:
weights.fillna(0, inplace=True)
wn.warn("NAs detected in weights. Imputing with zeroes.")
residual_weights = 1 - weights.sum(axis=1)
if abs(residual_weights).sum() != 0:
print("One or more periods do not have investment equal to 1. Creating residual weights.")
weights["Residual"] = residual_weights
R["Residual"] = 0
return weights, R
def compute_risk_contribution(rets, weights):
"""
Returns the percentage risk contribution of each assets over the period
source: https://quantdare.com/risk-contribution-in-portfolio-management/
Parameters
----------
rets : np.ndarray
TxN array of returns
weights : np.array
array of weights
Returns
-------
np.ndarray
percentage risk contribution
"""
covariances = np.cov(rets[1:].T)
portfolio_vol = np.sqrt(np.dot(np.dot(weights, covariances), weights))
marginal_risk_contr = np.dot(weights, covariances) / portfolio_vol
risk_contribution = weights * marginal_risk_contr
return risk_contribution / portfolio_vol
def return_portfolio(R, weights=None, verbose=True, rebalance_on=None):
"""
Parameters
----------
R : a pandas series of asset returns
weights : a vector or pandas series of asset weights.
verbose : a boolean specifying a verbose output containing:
portfolio returns,
beginning of period weights and values, end of period weights and values,
asset contribution to returns, asset contribution to risk,
and two-way turnover calculation
rebalance_on : a string specifying rebalancing frequency if weights are passed in as a vector.
e.g. 'months'
Raises
------
ValueError
Number of asset weights must be equal to the number of assets.
Returns
-------
TYPE
See verbose parameter for True value, otherwise just portfolio returns.
"""
R = R.copy()
# impute NAs in returns
if R.isna().sum().sum() > 0:
R.fillna(0, inplace=True)
wn.warn("NAs detected in returns. Imputing with zeroes.")
weights, R = prepare_weights(R, weights=weights, rebalance_on=rebalance_on)
Wv = weights.values
date_idx = np.where(R.index.isin(weights.index.tolist()))[0]
Rv = R.values
portf_returns, bop_weights, eop_weights = [], [], []
for i in range(date_idx.shape[0]):
try:
subset = Rv[date_idx[i]:date_idx[i+1]+1]
except:
# last rebalance
subset = Rv[date_idx[i]:]
subset_out = compute_weights_and_returns(subset, Wv[i])
if i > 0:
# drop first period as already there, not for EOD (for turnover comp)
subset_out = [s[1:] for s in subset_out[:2]] + [subset_out[2]]
portf_returns.append(subset_out[0])
bop_weights.append(subset_out[1])
eop_weights.append(subset_out[2])
portf_returns = np.concatenate(portf_returns)
bop_weights = np.concatenate(bop_weights)
eop_weights = np.concatenate(eop_weights)
#risk_contribution = np.concatenate(risk_contribution, axis = 0)
if not verbose:
return pd.DataFrame(pd.Series(portf_returns, index=R.loc[weights.index[0]:].index,
name='portfolio.return')).iloc[1:,]
# using numpy, we have a problem because we dont have dates on axis
increment = np.array([0]+list(range(weights.shape[0]-1)))
date_idx_eop = date_idx + increment # duplicate position, first occurence
duplicate_positions = date_idx_eop + np.array([0] + [1] * (weights.shape[0]-1)) # duplicate position, second occurence
pos = np.unique(np.concatenate((date_idx_eop, duplicate_positions)))
pos = pos - date_idx[0] # because we truncate data before first rebalance
weights_at_rebalance = eop_weights[pos]
# reconstruct pd.DataFrame to use Groupby on dates
weights_at_rebalance = pd.DataFrame(
weights_at_rebalance, columns=weights.columns,
index=[
weights.index[0]
] + list(np.sort(weights.index[1:].to_list()*2))
)
turnover = compute_turnover(weights_at_rebalance)
# droping duplicate indices due to loop in case of multiple rebalancing
# need to keep them to compute turnover
eop_weights = np.delete(eop_weights, (date_idx_eop - date_idx[0])[1:], axis=0)
pct_contribution = np.multiply(Rv[date_idx[0]:], bop_weights)
cum_returns = (1 + portf_returns).cumprod()
eop_value = np.multiply(
eop_weights,
np.tile(cum_returns, (eop_weights.shape[1], 1)).T
)
bop_value = np.multiply(
bop_weights,
np.tile(cum_returns/(1+portf_returns), (bop_weights.shape[1], 1)).T
)
# recreate pd.DataFrames
date_index = R.loc[weights.index[0]:].index
cols = weights.columns
portf_returns = pd.DataFrame(pd.Series(portf_returns, index=date_index,
name='portfolio.return'))
pct_contribution = pd.DataFrame(pct_contribution, index=date_index, columns=cols)
bop_weights = pd.DataFrame(bop_weights, index=date_index, columns=cols)
eop_weights = pd.DataFrame(eop_weights, index=date_index, columns=cols)
bop_value = pd.DataFrame(bop_value, index=date_index, columns=cols)
eop_value = pd.DataFrame(eop_value, index=date_index, columns=cols)
# remove first day of zeroes and trailing 1 for turnover
portf_returns = portf_returns.iloc[1:,]
bop_weights = bop_weights.iloc[1:,]
eop_weights = eop_weights.iloc[1:,]
pct_contribution = pct_contribution.iloc[1:,]
bop_value = bop_value.iloc[1:,]
eop_value = eop_value.iloc[1:,]
turnover = turnover.iloc[0:(len(turnover)-1),]
#risk_contribution = pd.DataFrame(risk_contribution, index=weights.index, columns=cols)
out = [portf_returns, pct_contribution, bop_weights, eop_weights,
bop_value, eop_value, turnover]
out = {k: v for k, v in zip(['returns', 'contribution', 'BOP.Weight',
'EOP.Weight', 'BOP.Value', 'EOP.Value',
'Two.Way.Turnover'], out)}
return out
if __name__ == '__main__':
res = make_demo()