def snr(M, list1, list2, threshold = None, significance = False):
"""
Performs a signal-to-noise ratio test on M, assuming samples are in rows and genes are in columns
list1 - List of row indices for first group
list2 - List of row indices for second group
threshold - Minimum SNR ratio to report
significance - Run kruskal ttest (requires scipy)
Returns a reverse-ordered list of (ratio, index, mean1, mean2, pvalue) tuples, where index is the column index of the gene,
and mean1 and mean2 correspond to the mean for that particular gene in list1 and list2, respectively. pvalue is blank if significance
is False.
If signifance is true (and scipy is installed) a pvalue will be assigned. Be ware this increases processing
time significantly (ha).
"""
ratios = []
N1 = M.take(tuple(list1), 0)
N2 = M.take(tuple(list2), 0)
N1mean, N2mean = N1.mean(0), N2.mean(0)
means = numpy.abs(N1mean - N2mean)
stds = N1.std(0) + N2.std(0)
if stds.all():
rats = means / stds
else:
rats = numpy.zeros((len(means),), dtype=numpy.float32)
for i in xrange(len(stds)):
if stds[i]:
rats[i] = means[i] / stds[i]
for i in xrange(M.shape[1]):
rat = rats[i]
mean1, mean2 = N1mean[i], N2mean[i]
if threshold is None or rat >= threshold:
if PVAL and significance:
pval = st.kruskal(N1[:,i], N2[:,i])[1]
else:
pval = ''
ratios.append( (rat, i, mean1, mean2, pval) )
ratios.sort(reverse=True)
return ratios
评论列表
文章目录