python类kruskal()的实例源码-面圈网

stats.py 文件源码项目：ISM2017 作者: ybayle 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def kruskal(data):
    """
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kruskal.html
    """
    if len(data) == 2:
        statistic, pvalue = stats.kruskal(data[0], data[1])
    elif len(data) == 3:
        statistic, pvalue = stats.kruskal(data[0], data[1], data[2])
    elif len(data) == 4:
        statistic, pvalue = stats.kruskal(data[0], data[1], data[2], data[3])
    else:
        utils.print_error("TODO kruskal manage more values")
    print("Kruskal Statistic " + str(statistic) + " and p-value " + str(pvalue))
    if pvalue > 0.05:
        # same
        return False
    else:
        # different
        return True

stats.py 文件源码项目：ISM2017 作者: ybayle 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def main(stats_dir):
    utils.print_success("Statistical analysis")
    stats_file = listdir(utils.abs_path_dir(stats_dir))
    for filen in stats_file:
        utils.print_info(filen)
        data, names = read_data_2(stats_dir, filen)
        if assert_homoscedasticity(data):
            if anova(data):
                tukey(data, names)
            else:
                print("All means are the same")
        else:
            if kruskal(data):
                print("cf R")
                # Dunn
                # Conover-Iman
                # Dwass-Steel-Citchlow-Fligner
            else:
                print("All means are the same")

tableone.py 文件源码项目：tableone 作者: tompollard 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def _p_test(self,v,grouped_data,is_continuous,is_categorical,
            is_normal,min_observed,catlevels,
            pval=np.nan,ptest='Not tested'):
        """
        Compute p value
        """

        # do not test if any sub-group has no observations
        if min_observed == 0:
            warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))
            return pval,ptest

        # continuous
        if is_continuous and is_normal:
            # normally distributed
            ptest = 'One-way ANOVA'
            test_stat, pval = stats.f_oneway(*grouped_data)
        elif is_continuous and not is_normal:
            # non-normally distributed
            ptest = 'Kruskal-Wallis'
            test_stat, pval = stats.kruskal(*grouped_data)
        # categorical
        elif is_categorical:
            # default to chi-squared
            ptest = 'Chi-squared'
            chi2, pval, dof, expected = stats.chi2_contingency(grouped_data)
            # if any expected cell counts are < 5, chi2 may not be valid
            # if this is a 2x2, switch to fisher exact
            if expected.min() < 5:
                if grouped_data.shape == (2,2):
                    ptest = 'Fisher''s exact'
                    oddsratio, pval = stats.fisher_exact(grouped_data)
                else:
                    ptest = 'Chi-squared (warning: expected count < 5)'
                    warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))

        return pval,ptest

statisticTest.py 文件源码项目：TFG 作者: alu0100505078 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def kruskalWallisTest(nAlgorithms,hyperVolumeList):
    #stats.kruskal(algoritmo1, algoritmo2)
    print 'entre a kruskalwallis'

    kruskal = []
    for i in range(nAlgorithms):
        algorithm = np.array(hyperVolumeList[i])
        j =i+1
        while j < nAlgorithms:
            algorithmCompare = np.array(hyperVolumeList[j])
            kruskalTest = stats.kruskal(algorithm, algorithmCompare)
            kruskal.append(kruskalTest)
            print kruskal
            j +=1

    return kruskal

basics_backup.py 文件源码项目：brainpipe 作者: EtienneCmb 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def _evalstat(x, bsl, meth, n_perm, metric, maxstat, tail):
    """Statistical evaluation of features

    [x] = [xn] = (nFce, npts, nTrials)
    [bsl] = (nFce, nTrials)
    """
    # Get shape of xF :
    nf, npts, nt = x.shape
    pvalues = np.ones((nf, npts))

    # Permutations :
    if meth == 'permutation':
        perm = perm_swaparray(a, b, n_perm=200, axis=-1, rndstate=0)
        from brainpipe.xPOO.stats import permutation
        # Pre-define permutations :
        pObj = permutation(n_perm)
        perm = np.zeros((n_perm, nf, npts))
        # For each permutation :
        for p in range(n_perm):
            # Get 1D iterations :
            ite = product(range(nf), range(npts))
            permT = np.random.permutation(2*nt)
            for f, pts in ite:
                bs, xs = bsl[f, :], x[f, pts, :]
                # Reshape data :
                subX = np.vstack((bsl[f, :], x[f, pts, :])).reshape(2*nt,)
                # Shuffle data :
                subX = subX[permT].reshape(nt, 2)
                # Normalize data :
                subX = normalize(subX[:, 0], subX[:, 1], norm=norm)
                # Get mean of data :
                perm[p, f, pts] = np.mean(subX)
        # Get final pvalues :
        pvalues = pObj.perm2p(np.mean(xn, 2), perm, tail=tail,
                              maxstat=maxstat)

    # Wilcoxon test :
    elif meth == 'wilcoxon':
        from scipy.stats import wilcoxon
        # Get iterations :
        ite = product(range(nf), range(npts))
        # Compute wilcoxon :
        for k, i in ite:
            _, pvalues[k, i] = wilcoxon(x[k, i, :], bsl[k, :])

    # Kruskal-Wallis :
    elif meth == 'kruskal':
        from scipy.stats import kruskal
        # Get iterations :
        ite = product(range(nf), range(npts))
        # Compute Kruskal-Wallis :
        for k, i in ite:
            _, pvalues[k, i] = kruskal(x[k, i, :], bsl[k, :])

    return pvalues

pca.py 文件源码项目：ECoG-ClusterFlow 作者: sugeerth 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def snr(M, list1, list2, threshold = None, significance = False):
    """

    Performs a signal-to-noise ratio test on M, assuming samples are in rows and genes are in columns

        list1       - List of row indices for first group
        list2       - List of row indices for second group
        threshold   - Minimum SNR ratio to report
        significance - Run kruskal ttest (requires scipy)

    Returns a reverse-ordered list of (ratio, index, mean1, mean2, pvalue) tuples, where index is the column index of the gene,
    and mean1 and mean2 correspond to the mean for that particular gene in list1 and list2, respectively.  pvalue is blank if significance
    is False.

    If signifance is true (and scipy is installed) a pvalue will be assigned. Be ware this increases processing
    time significantly (ha).

    """

    ratios = []

    N1 = M.take(tuple(list1), 0)
    N2 = M.take(tuple(list2), 0)

    N1mean, N2mean = N1.mean(0), N2.mean(0)
    means = numpy.abs(N1mean - N2mean)
    stds  = N1.std(0) + N2.std(0)

    if stds.all():
        rats = means / stds
    else:
        rats = numpy.zeros((len(means),), dtype=numpy.float32)
        for i in xrange(len(stds)):
            if stds[i]:
                rats[i] = means[i] / stds[i]

    for i in xrange(M.shape[1]):

        rat = rats[i]
        mean1, mean2 = N1mean[i], N2mean[i]

        if threshold is None or rat >= threshold:

            if PVAL and significance:
                pval = st.kruskal(N1[:,i], N2[:,i])[1]
            else:
                pval = ''

            ratios.append( (rat, i, mean1, mean2, pval) )

    ratios.sort(reverse=True)

    return ratios