model.py 文件源码-python代码片段

model.py 文件源码

python

阅读 28 收藏 0 点赞 0 评论 0

项目：DriverPower 作者: smshuai 项目源码文件源码

def run_rndlasso(X, y, alpha,
    n_resampling=500, sample_fraction=0.1, n_threads=1):
    """  Implement Randomized Lasso in sklearn

    Args:
        X (np.array): scaled X. 
        y (pd.df): four columns response table. 
        alpha (float): parameter trained from lassoCV 
        n_resampling (int): number of times for resampling 
        sample_fraction (float): fraction of data to use at each resampling

    Returns:
        np.array: feature importance scores

    """
    logger.info('Implementing Randomized Lasso with alpha={}, n_resampling={} and sample_fraction={}'.
                format(alpha, n_resampling, sample_fraction))
    # generate logit response
    y_logit = logit((y.nMut + 0.5) / (y.length * y.N))
    reg = RandomizedLasso(alpha=alpha,
                          n_resampling=n_resampling,
                          sample_fraction=sample_fraction,
                          selection_threshold=1e-3,
                          max_iter=3000,
                          normalize=False,
                          n_jobs=n_threads)
    rndlasso = reg.fit(X, y_logit)
    fi_scores = rndlasso.scores_
    return fi_scores