def kruskal(data):
"""
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kruskal.html
"""
if len(data) == 2:
statistic, pvalue = stats.kruskal(data[0], data[1])
elif len(data) == 3:
statistic, pvalue = stats.kruskal(data[0], data[1], data[2])
elif len(data) == 4:
statistic, pvalue = stats.kruskal(data[0], data[1], data[2], data[3])
else:
utils.print_error("TODO kruskal manage more values")
print("Kruskal Statistic " + str(statistic) + " and p-value " + str(pvalue))
if pvalue > 0.05:
# same
return False
else:
# different
return True
python类kruskal()的实例源码
def main(stats_dir):
utils.print_success("Statistical analysis")
stats_file = listdir(utils.abs_path_dir(stats_dir))
for filen in stats_file:
utils.print_info(filen)
data, names = read_data_2(stats_dir, filen)
if assert_homoscedasticity(data):
if anova(data):
tukey(data, names)
else:
print("All means are the same")
else:
if kruskal(data):
print("cf R")
# Dunn
# Conover-Iman
# Dwass-Steel-Citchlow-Fligner
else:
print("All means are the same")
def _p_test(self,v,grouped_data,is_continuous,is_categorical,
is_normal,min_observed,catlevels,
pval=np.nan,ptest='Not tested'):
"""
Compute p value
"""
# do not test if any sub-group has no observations
if min_observed == 0:
warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))
return pval,ptest
# continuous
if is_continuous and is_normal:
# normally distributed
ptest = 'One-way ANOVA'
test_stat, pval = stats.f_oneway(*grouped_data)
elif is_continuous and not is_normal:
# non-normally distributed
ptest = 'Kruskal-Wallis'
test_stat, pval = stats.kruskal(*grouped_data)
# categorical
elif is_categorical:
# default to chi-squared
ptest = 'Chi-squared'
chi2, pval, dof, expected = stats.chi2_contingency(grouped_data)
# if any expected cell counts are < 5, chi2 may not be valid
# if this is a 2x2, switch to fisher exact
if expected.min() < 5:
if grouped_data.shape == (2,2):
ptest = 'Fisher''s exact'
oddsratio, pval = stats.fisher_exact(grouped_data)
else:
ptest = 'Chi-squared (warning: expected count < 5)'
warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))
return pval,ptest
def kruskalWallisTest(nAlgorithms,hyperVolumeList):
#stats.kruskal(algoritmo1, algoritmo2)
print 'entre a kruskalwallis'
kruskal = []
for i in range(nAlgorithms):
algorithm = np.array(hyperVolumeList[i])
j =i+1
while j < nAlgorithms:
algorithmCompare = np.array(hyperVolumeList[j])
kruskalTest = stats.kruskal(algorithm, algorithmCompare)
kruskal.append(kruskalTest)
print kruskal
j +=1
return kruskal
def _evalstat(x, bsl, meth, n_perm, metric, maxstat, tail):
"""Statistical evaluation of features
[x] = [xn] = (nFce, npts, nTrials)
[bsl] = (nFce, nTrials)
"""
# Get shape of xF :
nf, npts, nt = x.shape
pvalues = np.ones((nf, npts))
# Permutations :
if meth == 'permutation':
perm = perm_swaparray(a, b, n_perm=200, axis=-1, rndstate=0)
from brainpipe.xPOO.stats import permutation
# Pre-define permutations :
pObj = permutation(n_perm)
perm = np.zeros((n_perm, nf, npts))
# For each permutation :
for p in range(n_perm):
# Get 1D iterations :
ite = product(range(nf), range(npts))
permT = np.random.permutation(2*nt)
for f, pts in ite:
bs, xs = bsl[f, :], x[f, pts, :]
# Reshape data :
subX = np.vstack((bsl[f, :], x[f, pts, :])).reshape(2*nt,)
# Shuffle data :
subX = subX[permT].reshape(nt, 2)
# Normalize data :
subX = normalize(subX[:, 0], subX[:, 1], norm=norm)
# Get mean of data :
perm[p, f, pts] = np.mean(subX)
# Get final pvalues :
pvalues = pObj.perm2p(np.mean(xn, 2), perm, tail=tail,
maxstat=maxstat)
# Wilcoxon test :
elif meth == 'wilcoxon':
from scipy.stats import wilcoxon
# Get iterations :
ite = product(range(nf), range(npts))
# Compute wilcoxon :
for k, i in ite:
_, pvalues[k, i] = wilcoxon(x[k, i, :], bsl[k, :])
# Kruskal-Wallis :
elif meth == 'kruskal':
from scipy.stats import kruskal
# Get iterations :
ite = product(range(nf), range(npts))
# Compute Kruskal-Wallis :
for k, i in ite:
_, pvalues[k, i] = kruskal(x[k, i, :], bsl[k, :])
return pvalues
def snr(M, list1, list2, threshold = None, significance = False):
"""
Performs a signal-to-noise ratio test on M, assuming samples are in rows and genes are in columns
list1 - List of row indices for first group
list2 - List of row indices for second group
threshold - Minimum SNR ratio to report
significance - Run kruskal ttest (requires scipy)
Returns a reverse-ordered list of (ratio, index, mean1, mean2, pvalue) tuples, where index is the column index of the gene,
and mean1 and mean2 correspond to the mean for that particular gene in list1 and list2, respectively. pvalue is blank if significance
is False.
If signifance is true (and scipy is installed) a pvalue will be assigned. Be ware this increases processing
time significantly (ha).
"""
ratios = []
N1 = M.take(tuple(list1), 0)
N2 = M.take(tuple(list2), 0)
N1mean, N2mean = N1.mean(0), N2.mean(0)
means = numpy.abs(N1mean - N2mean)
stds = N1.std(0) + N2.std(0)
if stds.all():
rats = means / stds
else:
rats = numpy.zeros((len(means),), dtype=numpy.float32)
for i in xrange(len(stds)):
if stds[i]:
rats[i] = means[i] / stds[i]
for i in xrange(M.shape[1]):
rat = rats[i]
mean1, mean2 = N1mean[i], N2mean[i]
if threshold is None or rat >= threshold:
if PVAL and significance:
pval = st.kruskal(N1[:,i], N2[:,i])[1]
else:
pval = ''
ratios.append( (rat, i, mean1, mean2, pval) )
ratios.sort(reverse=True)
return ratios