python类f_oneway()的实例源码-面圈网

stats.py 文件源码项目：ISM2017 作者: ybayle 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def anova(data):
    """
    return True is at least one mean is different from the other
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.f_oneway.html
    """
    if len(data) == 2:
        statistic, pvalue = stats.f_oneway(data[0], data[1])
    elif len(data) == 3:
        statistic, pvalue = stats.f_oneway(data[0], data[1], data[2])
    elif len(data) == 4:
        statistic, pvalue = stats.f_oneway(data[0], data[1], data[2], data[3])
    else:
        utils.print_error("TODO ANOVA manage more values")
    print("ANOVA Statistic " + str(statistic) + " and p-value " + str(pvalue))
    if pvalue < 0.05:
        return True
    else:
        return False

tableone.py 文件源码项目：tableone 作者: tompollard 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def _p_test(self,v,grouped_data,is_continuous,is_categorical,
            is_normal,min_observed,catlevels,
            pval=np.nan,ptest='Not tested'):
        """
        Compute p value
        """

        # do not test if any sub-group has no observations
        if min_observed == 0:
            warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))
            return pval,ptest

        # continuous
        if is_continuous and is_normal:
            # normally distributed
            ptest = 'One-way ANOVA'
            test_stat, pval = stats.f_oneway(*grouped_data)
        elif is_continuous and not is_normal:
            # non-normally distributed
            ptest = 'Kruskal-Wallis'
            test_stat, pval = stats.kruskal(*grouped_data)
        # categorical
        elif is_categorical:
            # default to chi-squared
            ptest = 'Chi-squared'
            chi2, pval, dof, expected = stats.chi2_contingency(grouped_data)
            # if any expected cell counts are < 5, chi2 may not be valid
            # if this is a 2x2, switch to fisher exact
            if expected.min() < 5:
                if grouped_data.shape == (2,2):
                    ptest = 'Fisher''s exact'
                    oddsratio, pval = stats.fisher_exact(grouped_data)
                else:
                    ptest = 'Chi-squared (warning: expected count < 5)'
                    warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))

        return pval,ptest

anova.py 文件源码项目：pysciencedock 作者: Kitware 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def anova(data):
    if len(data.groupby(level=1)) <= 2:
        raise Exception('ANOVA requires a secondary index with three or more values')

    return pd.DataFrame(
        [f_oneway(*[v for k, v in data[col].groupby(level=1)]) for col in data.columns],
        columns=['f', 'p'],
        index=data.columns)

calculate_p_value.py 文件源码项目：facial-emotion-detection-dl 作者: dllatas 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def main():
    """
    1st phase
    top1 = [70.0, 71.1, 72.5, 70.8, 68.1, 71.9, 71.1, 71.3, 68.4, 70.2]
    top3 = [75.8, 78.4, 77.8, 77.7, 80.0, 77.8, 78.7, 76.4, 79.1, 77.3]
    2nd phase
    """
    x = [53.6, 54.5, 53.7, 52.7, 53.1, 55.5, 55.5, 52.8, 53.7, 52.7]
    y = [89.7, 89.1, 89.5, 88.7, 89.4, 88.6, 89.8, 89.5, 89.2, 89.7]
    # Compute the Wilcoxon rank-sum statistic for two samples.
    wilcoxon = stats.ranksums(x, y)
    anova = stats.f_oneway(x, y)
    print "Wilcoxon: " + str(wilcoxon[1]) + "; ANOVA: " + str(anova[1])

statisticTest.py 文件源码项目：TFG 作者: alu0100505078 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def anovaTest(nAlgorithms,hyperVolumeList):
    anova = []
    for i in range(nAlgorithms):
        algorithm = np.array(hyperVolumeList[i])
        j=i+1
        while j < nAlgorithms:
            algorithmCompare = np.array(hyperVolumeList[j])
            anvaTest = stats.f_oneway(algorithm, algorithmCompare)
            anova.append(anvaTest)
            j +=1
            print 'esto es anova'
            print anova
    return anova

test_feature_select.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_f_oneway_vs_scipy_stats():
    # Test that our f_oneway gives the same result as scipy.stats
    rng = np.random.RandomState(0)
    X1 = rng.randn(10, 3)
    X2 = 1 + rng.randn(10, 3)
    f, pv = stats.f_oneway(X1, X2)
    f2, pv2 = f_oneway(X1, X2)
    assert_true(np.allclose(f, f2))
    assert_true(np.allclose(pv, pv2))

test_feature_select.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test_f_oneway_ints():
    # Smoke test f_oneway on integers: that it does raise casting errors
    # with recent numpys
    rng = np.random.RandomState(0)
    X = rng.randint(10, size=(10, 10))
    y = np.arange(10)
    fint, pint = f_oneway(X, y)

    # test that is gives the same result as with float
    f, p = f_oneway(X.astype(np.float), y)
    assert_array_almost_equal(f, fint, decimal=4)
    assert_array_almost_equal(p, pint, decimal=4)

analytics.py 文件源码项目：physalia 作者: TQRG 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def smart_hypothesis_testing(*samples, **options):
    """Do a smart hypothesis testing."""
    fancy = options.get('fancy', True)
    out = options.get('out', sys.stdout)
    alpha = options.get('alpha', 0.05)
    equal_var = options.get('equal_var', True)
    latex = options.get('latex', True)

    samples = [np.array(sample, dtype='float') for sample in samples]
    len_samples = len(samples)
    out_buffer = StringIO()

    normality_results = samples_are_normal(*samples)
    if all(map(itemgetter(0), normality_results)):
        # all our samples are normal
        if equal_var:
            if fancy:
                out_buffer.write(Template(
                    u"Hypothesis testing:\n\n"
                    "\t$H0: ${mu}1 = ${mu}2{ellipsis} = $mu{len_samples}. "
                    "The means for all groups are equal.\n"
                    "\t$H1: $exists a,b $elementof Samples: ${mu}a $neq ${mu}b. "
                    "At least two of the means are not equal.\n\n"
                    "The significance test one-way analysis of variance (ANOVA) "
                    "was used with a significance level of $alpha={alpha:.2f}.\n"
                    "This test requires that the following "
                    "assumptions are satisfied:\n\n"
                    "1. Samples are independent.\n"
                    "2. Samples are drawn from a normally distributed population.\n"
                    "3. All populations have equal standard deviation.\n\n"
                    "For the assumption of normal distribution two tests were "
                    "performed ($alpha={alpha}): Shapiro Wilk's test "
                    "and D'Agostino and Pearson's test.\n"
                    "None of these tests reject the null hypothesis with "
                    "significance level of $alpha={alpha}, thus it is assumed that data "
                    "follows a normal distribution.\n\n"
                    "").substitute(GREEK_ALPHABET).format(
                        ellipsis=" = ..." if len_samples > 3 else "",
                        **locals()
                    ))
            statistic, pvalue = f_oneway(*samples)
            if fancy:
                if pvalue < alpha:
                    out_buffer.write(
                        u"One can say that samples come from populations "
                        "with different means, since ANOVA rejects the "
                        "null hypothesis "
                        "(statistic={statistic:.2f}, {pvalue_str}).\n"
                        "".format(pvalue_str=_pvalue_to_str(pvalue), **locals())
                    )
                else:
                    out_buffer.write(
                        u"Thus, it was not possible to find evidence that"
                        " the means of populations are different "
                        "(statistic={statistic:.2f},{rho}={pvalue:.2f}).\n"
                        "".format(**locals())
                    )
            _flush_output(out, out_buffer, latex)
            return statistic, pvalue, f_oneway

eda.py 文件源码项目：xam 作者: MaxHalford 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def feature_importance_regression(features, target, n_neighbors=3, random_state=None):

    cont = features.select_dtypes(include=[np.floating])
    disc = features.select_dtypes(include=[np.integer, np.bool])

    cont_imp = pd.DataFrame(index=cont.columns)
    disc_imp = pd.DataFrame(index=disc.columns)

    # Continuous features
    if cont_imp.index.size > 0:

        # Pearson correlation
        pearson = np.array([stats.pearsonr(feature, target) for _, feature in cont.iteritems()])
        cont_imp['pearson_r'] = pearson[:, 0]
        cont_imp['pearson_r_p_value'] = pearson[:, 1]

        # Mutual information
        mut_inf = feature_selection.mutual_info_regression(cont, target, discrete_features=False,
                                                           n_neighbors=n_neighbors,
                                                           random_state=random_state)
        cont_imp['mutual_information'] = mut_inf

    # Discrete features
    if disc_imp.index.size > 0:

        # F-test
        f_tests = defaultdict(dict)

        for feature in disc.columns:
            groups = [target[idxs] for idxs in disc.groupby(feature).groups.values()]
            statistic, p_value = stats.f_oneway(*groups)
            f_tests[feature]['f_statistic'] = statistic
            f_tests[feature]['f_p_value'] = p_value

        f_tests_df = pd.DataFrame.from_dict(f_tests, orient='index')
        disc_imp['f_statistic'] = f_tests_df['f_statistic']
        disc_imp['f_p_value'] = f_tests_df['f_p_value']

        # Mutual information
        mut_inf = feature_selection.mutual_info_regression(disc, target, discrete_features=True,
                                                           n_neighbors=n_neighbors,
                                                           random_state=random_state)
        disc_imp['mutual_information'] = mut_inf

    return cont_imp, disc_imp