def run_glm(X, y, model_name):
""" Train the binomial/negative binomial GLM
Args:
X (np.array): scaled X.
y (pd.df): four columns response table.
Returns:
sm.model: trained GLM models.
"""
# Add const manually. sm.add_constant cannot add 1 for shape (1, n)
X = np.c_[X, np.ones(X.shape[0])]
if model_name == 'Binomial':
logger.info('Building binomial GLM')
# make two columns response (# success, # failure)
y_binom = np.zeros((y.shape[0], 2), dtype=np.int_)
y_binom[:, 0] = y.nMut
y_binom[:, 1] = y.length * y.N - y.nMut
glm = sm.GLM(y_binom, X, family=sm.families.Binomial())
elif model_name == 'NegativeBinomial':
logger.info('Building negative binomial GLM')
# use nMut as response and length*N as exposure
glm = sm.GLM(y.nMut.values, X, family=sm.families.NegativeBinomial(), exposure=y.length.values*y.N.values+1)
else:
sys.stderr.write('Unknown GLM name {}. Must be Binomial or NegativeBinomial'.format(model_name))
sys.exit(1)
model = glm.fit()
return model
评论列表
文章目录