def dispersion_test(yhat, y, k=100):
""" Implement the regression based dispersion test with k re-sampling.
Args:
yhat (np.array): predicted mutation count
y (np.array): observed mutation count
k (int):
Returns:
float, float: p-value, theta
"""
theta = 0
pval = 0
for i in range(k):
y_sub, yhat_sub = resample(y, yhat, random_state=i)
# (np.power((y - yhat), 2) - y) / yhat for Poisson regression
aux = (np.power((y_sub - yhat_sub), 2) - yhat_sub) / yhat_sub
mod = sm.OLS(aux, yhat_sub)
res = mod.fit()
theta += res.params[0]
pval += res.pvalues[0]
theta = theta/k
pval = pval/k
return pval, theta
评论列表
文章目录