def _fit_cdd_only(df, weighted=False):
bps = [i[4:] for i in df.columns if i[:3] == 'CDD']
best_bp, best_rsquared, best_mod, best_res = None, -9e9, None, None
best_formula, cdd_qualified = None, False
try: # TODO: fix big try block anti-pattern
for bp in bps:
candidate_cdd_formula = 'upd ~ CDD_' + bp
if (np.nansum(df['CDD_' + bp] > 0) < 10) or \
(np.nansum(df['CDD_' + bp]) < 20):
continue
if weighted:
candidate_cdd_mod = smf.wls(formula=candidate_cdd_formula, data=df,
weights=df['ndays'])
else:
candidate_cdd_mod = smf.ols(formula=candidate_cdd_formula, data=df)
candidate_cdd_res = candidate_cdd_mod.fit()
candidate_cdd_rsquared = candidate_cdd_res.rsquared_adj
if (candidate_cdd_rsquared > best_rsquared and
candidate_cdd_res.params['Intercept'] >= 0 and
candidate_cdd_res.params['CDD_' + bp] >= 0 and
candidate_cdd_res.pvalues['CDD_' + bp] < 0.1):
best_bp, best_rsquared = int(bp), candidate_cdd_rsquared
best_mod, best_res = candidate_cdd_mod, candidate_cdd_res
cdd_qualified = True
best_formula = 'upd ~ CDD_' + bp
except: # TODO: catch specific error
best_rsquared, cdd_qualified = 0, False
best_formula, best_mod, best_res = None, None, None
best_bp = None
return best_formula, best_mod, best_res, best_rsquared, cdd_qualified, best_bp
评论列表
文章目录