def score_event(nx, ny, ncommon, barcode_frequencies, resamples=100):
"""
perform a resampling test, based on the number of fragments in each region
and the barcode frequency distribution (the latter because some barcodes
have more DNA in them than others)
"""
samples = []
for _ in range(resamples):
s1 = numpy.random.choice(len(barcode_frequencies), nx, replace=False, p=barcode_frequencies)
s2 = numpy.random.choice(len(barcode_frequencies), ny, replace=False, p=barcode_frequencies)
common = len(set(s1).intersection(set(s2)))
samples.append(common)
# make a one-sided one-sample Wilcoxon test
statistic, pvalue = stats.wilcoxon(numpy.array(samples)-ncommon)
if numpy.mean(samples) > ncommon:
pvalue = 1 - pvalue/2.0
else:
pvalue = pvalue/2.0
# result = r["wilcox.test"](samples, mu=ncommon, alternative="less")
# pvalue2 = result.rx2("p.value")[0]
# print "::::::", nx, ny, ncommon, (numpy.array(samples)>ncommon).sum(), pvalue, pvalue2
return pvalue
评论列表
文章目录