def run_rndlasso(X, y, alpha,
n_resampling=500, sample_fraction=0.1, n_threads=1):
""" Implement Randomized Lasso in sklearn
Args:
X (np.array): scaled X.
y (pd.df): four columns response table.
alpha (float): parameter trained from lassoCV
n_resampling (int): number of times for resampling
sample_fraction (float): fraction of data to use at each resampling
Returns:
np.array: feature importance scores
"""
logger.info('Implementing Randomized Lasso with alpha={}, n_resampling={} and sample_fraction={}'.
format(alpha, n_resampling, sample_fraction))
# generate logit response
y_logit = logit((y.nMut + 0.5) / (y.length * y.N))
reg = RandomizedLasso(alpha=alpha,
n_resampling=n_resampling,
sample_fraction=sample_fraction,
selection_threshold=1e-3,
max_iter=3000,
normalize=False,
n_jobs=n_threads)
rndlasso = reg.fit(X, y_logit)
fi_scores = rndlasso.scores_
return fi_scores
评论列表
文章目录