python类kruskal()的实例源码

stats.py 文件源码 项目:ISM2017 作者: ybayle 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def kruskal(data):
    """
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kruskal.html
    """
    if len(data) == 2:
        statistic, pvalue = stats.kruskal(data[0], data[1])
    elif len(data) == 3:
        statistic, pvalue = stats.kruskal(data[0], data[1], data[2])
    elif len(data) == 4:
        statistic, pvalue = stats.kruskal(data[0], data[1], data[2], data[3])
    else:
        utils.print_error("TODO kruskal manage more values")
    print("Kruskal Statistic " + str(statistic) + " and p-value " + str(pvalue))
    if pvalue > 0.05:
        # same
        return False
    else:
        # different
        return True
stats.py 文件源码 项目:ISM2017 作者: ybayle 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def main(stats_dir):
    utils.print_success("Statistical analysis")
    stats_file = listdir(utils.abs_path_dir(stats_dir))
    for filen in stats_file:
        utils.print_info(filen)
        data, names = read_data_2(stats_dir, filen)
        if assert_homoscedasticity(data):
            if anova(data):
                tukey(data, names)
            else:
                print("All means are the same")
        else:
            if kruskal(data):
                print("cf R")
                # Dunn
                # Conover-Iman
                # Dwass-Steel-Citchlow-Fligner
            else:
                print("All means are the same")
tableone.py 文件源码 项目:tableone 作者: tompollard 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def _p_test(self,v,grouped_data,is_continuous,is_categorical,
            is_normal,min_observed,catlevels,
            pval=np.nan,ptest='Not tested'):
        """
        Compute p value
        """

        # do not test if any sub-group has no observations
        if min_observed == 0:
            warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))
            return pval,ptest

        # continuous
        if is_continuous and is_normal:
            # normally distributed
            ptest = 'One-way ANOVA'
            test_stat, pval = stats.f_oneway(*grouped_data)
        elif is_continuous and not is_normal:
            # non-normally distributed
            ptest = 'Kruskal-Wallis'
            test_stat, pval = stats.kruskal(*grouped_data)
        # categorical
        elif is_categorical:
            # default to chi-squared
            ptest = 'Chi-squared'
            chi2, pval, dof, expected = stats.chi2_contingency(grouped_data)
            # if any expected cell counts are < 5, chi2 may not be valid
            # if this is a 2x2, switch to fisher exact
            if expected.min() < 5:
                if grouped_data.shape == (2,2):
                    ptest = 'Fisher''s exact'
                    oddsratio, pval = stats.fisher_exact(grouped_data)
                else:
                    ptest = 'Chi-squared (warning: expected count < 5)'
                    warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))

        return pval,ptest
statisticTest.py 文件源码 项目:TFG 作者: alu0100505078 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def kruskalWallisTest(nAlgorithms,hyperVolumeList):
    #stats.kruskal(algoritmo1, algoritmo2)
    print 'entre a kruskalwallis'

    kruskal = []
    for i in range(nAlgorithms):
        algorithm = np.array(hyperVolumeList[i])
        j =i+1
        while j < nAlgorithms:
            algorithmCompare = np.array(hyperVolumeList[j])
            kruskalTest = stats.kruskal(algorithm, algorithmCompare)
            kruskal.append(kruskalTest)
            print kruskal
            j +=1

    return kruskal
basics_backup.py 文件源码 项目:brainpipe 作者: EtienneCmb 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def _evalstat(x, bsl, meth, n_perm, metric, maxstat, tail):
    """Statistical evaluation of features

    [x] = [xn] = (nFce, npts, nTrials)
    [bsl] = (nFce, nTrials)
    """
    # Get shape of xF :
    nf, npts, nt = x.shape
    pvalues = np.ones((nf, npts))

    # Permutations :
    if meth == 'permutation':
        perm = perm_swaparray(a, b, n_perm=200, axis=-1, rndstate=0)
        from brainpipe.xPOO.stats import permutation
        # Pre-define permutations :
        pObj = permutation(n_perm)
        perm = np.zeros((n_perm, nf, npts))
        # For each permutation :
        for p in range(n_perm):
            # Get 1D iterations :
            ite = product(range(nf), range(npts))
            permT = np.random.permutation(2*nt)
            for f, pts in ite:
                bs, xs = bsl[f, :], x[f, pts, :]
                # Reshape data :
                subX = np.vstack((bsl[f, :], x[f, pts, :])).reshape(2*nt,)
                # Shuffle data :
                subX = subX[permT].reshape(nt, 2)
                # Normalize data :
                subX = normalize(subX[:, 0], subX[:, 1], norm=norm)
                # Get mean of data :
                perm[p, f, pts] = np.mean(subX)
        # Get final pvalues :
        pvalues = pObj.perm2p(np.mean(xn, 2), perm, tail=tail,
                              maxstat=maxstat)

    # Wilcoxon test :
    elif meth == 'wilcoxon':
        from scipy.stats import wilcoxon
        # Get iterations :
        ite = product(range(nf), range(npts))
        # Compute wilcoxon :
        for k, i in ite:
            _, pvalues[k, i] = wilcoxon(x[k, i, :], bsl[k, :])

    # Kruskal-Wallis :
    elif meth == 'kruskal':
        from scipy.stats import kruskal
        # Get iterations :
        ite = product(range(nf), range(npts))
        # Compute Kruskal-Wallis :
        for k, i in ite:
            _, pvalues[k, i] = kruskal(x[k, i, :], bsl[k, :])

    return pvalues
pca.py 文件源码 项目:ECoG-ClusterFlow 作者: sugeerth 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def snr(M, list1, list2, threshold = None, significance = False):
    """

    Performs a signal-to-noise ratio test on M, assuming samples are in rows and genes are in columns

        list1       - List of row indices for first group
        list2       - List of row indices for second group
        threshold   - Minimum SNR ratio to report
        significance - Run kruskal ttest (requires scipy)

    Returns a reverse-ordered list of (ratio, index, mean1, mean2, pvalue) tuples, where index is the column index of the gene,
    and mean1 and mean2 correspond to the mean for that particular gene in list1 and list2, respectively.  pvalue is blank if significance
    is False.

    If signifance is true (and scipy is installed) a pvalue will be assigned. Be ware this increases processing
    time significantly (ha).

    """

    ratios = []

    N1 = M.take(tuple(list1), 0)
    N2 = M.take(tuple(list2), 0)

    N1mean, N2mean = N1.mean(0), N2.mean(0)
    means = numpy.abs(N1mean - N2mean)
    stds  = N1.std(0) + N2.std(0)

    if stds.all():
        rats = means / stds
    else:
        rats = numpy.zeros((len(means),), dtype=numpy.float32)
        for i in xrange(len(stds)):
            if stds[i]:
                rats[i] = means[i] / stds[i]

    for i in xrange(M.shape[1]):

        rat = rats[i]
        mean1, mean2 = N1mean[i], N2mean[i]

        if threshold is None or rat >= threshold:

            if PVAL and significance:
                pval = st.kruskal(N1[:,i], N2[:,i])[1]
            else:
                pval = ''

            ratios.append( (rat, i, mean1, mean2, pval) )

    ratios.sort(reverse=True)

    return ratios


问题


面经


文章

微信
公众号

扫码关注公众号