def portfolio_timestamp_period_with_most_highly_corr_assets(self, df):
# A first approximation to model portfolio returns:
# i) Find assets that correlates with y, where correlation is higher than a threshold value
# ii) Include only above assets and find maximum timestamp period with most assets
# iii) Transform target value y to be cumulative mean of y in order to obtain monotonic behaviour
# iv) Train model to predict transformed target value with the selected most correlated assets in selected
# timestamp interval
# v) Run model on test data and apply inverse transform to get target value y.
# From plot it looks like a lot of assets are bought and sold at first and last timestamp.
# We should of course primarily select assets based on how much they are correlated with y
correlation_coeffecients = self.correlation_coeffecients
names_of_assets = correlation_coeffecients.loc[correlation_coeffecients.index != 'y'].sort_values(
ascending=False).head(self.number_of_assets_in_portfolio).index
# Todo: make a check if any intermediate sales assets are among the most corr with y
return df.loc[:, names_of_assets]
two_sigma_financial_modelling.py 文件源码
python
阅读 30
收藏 0
点赞 0
评论 0
评论列表
文章目录