def _fit_hdd_only(df, weighted=False):
bps = [i[4:] for i in df.columns if i[:3] == 'HDD']
best_bp, best_rsquared, best_mod, best_res = None, -9e9, None, None
best_formula, hdd_qualified = None, False
try: # TODO: fix big try block anti-pattern
for bp in bps:
candidate_hdd_formula = 'upd ~ HDD_' + bp
if (np.nansum(df['HDD_' + bp] > 0) < 10) or \
(np.nansum(df['HDD_' + bp]) < 20):
continue
if weighted:
candidate_hdd_mod = smf.wls(formula=candidate_hdd_formula, data=df,
weights=df['ndays'])
else:
candidate_hdd_mod = smf.ols(formula=candidate_hdd_formula, data=df)
candidate_hdd_res = candidate_hdd_mod.fit()
candidate_hdd_rsquared = candidate_hdd_res.rsquared_adj
if (candidate_hdd_rsquared > best_rsquared and
candidate_hdd_res.params['Intercept'] >= 0 and
candidate_hdd_res.params['HDD_' + bp] >= 0 and
candidate_hdd_res.pvalues['HDD_' + bp] < 0.1):
best_bp, best_rsquared = int(bp), candidate_hdd_rsquared
best_mod, best_res = candidate_hdd_mod, candidate_hdd_res
hdd_qualified = True
best_formula = 'upd ~ HDD_' + bp
except: # TODO: catch specific error
best_rsquared, hdd_qualified = 0, False
best_formula, best_mod, best_res = None, None, None
best_bp = None
return best_formula, best_mod, best_res, best_rsquared, hdd_qualified, best_bp
评论列表
文章目录