def Orthogonalize(left_data, right_data, left_name, right_name):
"""?????
?????????[date,IDs,factor1,factor2...]
??
--------
left_name: str
??1?????
right_name: str or list
??2??????????????????
industry: str
??????????None?????????
"""
def OLS(data, left_name):
tempData = data.copy()
yData = np.array(tempData.pop(left_name))
xData = np.array(tempData)
NaNInd = pd.notnull(yData) & pd.notnull(xData).all(axis=1)
model = sm.OLS(yData, xData, missing='drop')
res = model.fit()
data[left_name+'_orthogonalized'] = np.nan
data.ix[NaNInd, left_name+'_orthogonalized'] = res.resid
return data
factor_1 = left_data[[left_name]]
if not isinstance(right_name, list):
right_name = [right_name]
factor_2 = right_data[right_name]
factor = pd.concat([factor_1, factor_2], axis=1)
factor['alpha'] = 1 # ?????
factor = factor.groupby(level=0).apply(OLS, left_name=left_name)
return factor[[left_name+'_orthogonalized']]
评论列表
文章目录