def _filter_data(lhs, rhs, weights=None):
"""
Cleans the input for single OLS.
Parameters
----------
lhs : Series
Dependent variable in the regression.
rhs : dict, whose values are Series, DataFrame, or dict
Explanatory variables of the regression.
weights : array-like, optional
1d array of weights. If None, equivalent to an unweighted OLS.
Returns
-------
Series, DataFrame
Cleaned lhs and rhs
"""
if not isinstance(lhs, Series):
if len(lhs) != len(rhs):
raise AssertionError("length of lhs must equal length of rhs")
lhs = Series(lhs, index=rhs.index)
rhs = _combine_rhs(rhs)
lhs = DataFrame({'__y__': lhs}, dtype=float)
pre_filt_rhs = rhs.dropna(how='any')
combined = rhs.join(lhs, how='outer')
if weights is not None:
combined['__weights__'] = weights
valid = (combined.count(1) == len(combined.columns)).values
index = combined.index
combined = combined[valid]
if weights is not None:
filt_weights = combined.pop('__weights__')
else:
filt_weights = None
filt_lhs = combined.pop('__y__')
filt_rhs = combined
if hasattr(filt_weights, 'to_dense'):
filt_weights = filt_weights.to_dense()
return (filt_lhs.to_dense(), filt_rhs.to_dense(), filt_weights,
pre_filt_rhs.to_dense(), index, valid)
ols.py 文件源码
python
阅读 28
收藏 0
点赞 0
评论 0
评论列表
文章目录