caltrack_helpers.py 文件源码-python代码片段

def _fit_cdd_only(df, weighted=False):

    bps = [i[4:] for i in df.columns if i[:3] == 'CDD']
    best_bp, best_rsquared, best_mod, best_res = None, -9e9, None, None
    best_formula, cdd_qualified = None, False

    try:  # TODO: fix big try block anti-pattern
        for bp in bps:
            candidate_cdd_formula = 'upd ~ CDD_' + bp
            if (np.nansum(df['CDD_' + bp] > 0) < 10) or \
               (np.nansum(df['CDD_' + bp]) < 20):
                continue
            if weighted:
                candidate_cdd_mod = smf.wls(formula=candidate_cdd_formula, data=df,
                                            weights=df['ndays'])
            else:
                candidate_cdd_mod = smf.ols(formula=candidate_cdd_formula, data=df)
            candidate_cdd_res = candidate_cdd_mod.fit()
            candidate_cdd_rsquared = candidate_cdd_res.rsquared_adj
            if (candidate_cdd_rsquared > best_rsquared and
                    candidate_cdd_res.params['Intercept'] >= 0 and
                    candidate_cdd_res.params['CDD_' + bp] >= 0 and
                    candidate_cdd_res.pvalues['CDD_' + bp] < 0.1):
                best_bp, best_rsquared = int(bp), candidate_cdd_rsquared
                best_mod, best_res = candidate_cdd_mod, candidate_cdd_res
                cdd_qualified = True
                best_formula = 'upd ~ CDD_' + bp
    except:  # TODO: catch specific error
        best_rsquared, cdd_qualified = 0, False
        best_formula, best_mod, best_res = None, None, None
        best_bp = None

    return best_formula, best_mod, best_res, best_rsquared, cdd_qualified, best_bp