def run_lasso(X, y, max_iter=3000, cv=5, n_threads=1):
""" Implement LassoCV in sklearn
Args:
X (np.array): scaled X.
y (pd.df): four columns response table.
max_iter (int): max iteration.
cv (int): CV fold.
n_threads (int): Number of threads to use for parallel computing.
Returns:
float: trained alpha value.
"""
logger.info('Implementing LassoCV with {} iter. and {}-fold CV'.format(max_iter, cv))
# generate logit response
y_logit = logit((y.nMut + 0.5) / (y.length * y.N))
# sub-sampling X and y (300,000)
use_ix = np.random.choice(y_logit.shape[0], 300000, replace=False)
Xsub = X[use_ix, :]
ysub = y_logit[use_ix]
reg = LassoCV(max_iter=max_iter, cv=cv, copy_X=False, n_jobs=n_threads)
lassocv = reg.fit(Xsub, ysub)
logger.info('LassoCV alpha = {}'.format(lassocv.alpha_))
return lassocv.alpha_
评论列表
文章目录