analytics.py 文件源码

python
阅读 34 收藏 0 点赞 0 评论 0

项目:physalia 作者: TQRG 项目源码 文件源码
def smart_hypothesis_testing(*samples, **options):
    """Do a smart hypothesis testing."""
    fancy = options.get('fancy', True)
    out = options.get('out', sys.stdout)
    alpha = options.get('alpha', 0.05)
    equal_var = options.get('equal_var', True)
    latex = options.get('latex', True)

    samples = [np.array(sample, dtype='float') for sample in samples]
    len_samples = len(samples)
    out_buffer = StringIO()

    normality_results = samples_are_normal(*samples)
    if all(map(itemgetter(0), normality_results)):
        # all our samples are normal
        if equal_var:
            if fancy:
                out_buffer.write(Template(
                    u"Hypothesis testing:\n\n"
                    "\t$H0: ${mu}1 = ${mu}2{ellipsis} = $mu{len_samples}. "
                    "The means for all groups are equal.\n"
                    "\t$H1: $exists a,b $elementof Samples: ${mu}a $neq ${mu}b. "
                    "At least two of the means are not equal.\n\n"
                    "The significance test one-way analysis of variance (ANOVA) "
                    "was used with a significance level of $alpha={alpha:.2f}.\n"
                    "This test requires that the following "
                    "assumptions are satisfied:\n\n"
                    "1. Samples are independent.\n"
                    "2. Samples are drawn from a normally distributed population.\n"
                    "3. All populations have equal standard deviation.\n\n"
                    "For the assumption of normal distribution two tests were "
                    "performed ($alpha={alpha}): Shapiro Wilk's test "
                    "and D'Agostino and Pearson's test.\n"
                    "None of these tests reject the null hypothesis with "
                    "significance level of $alpha={alpha}, thus it is assumed that data "
                    "follows a normal distribution.\n\n"
                    "").substitute(GREEK_ALPHABET).format(
                        ellipsis=" = ..." if len_samples > 3 else "",
                        **locals()
                    ))
            statistic, pvalue = f_oneway(*samples)
            if fancy:
                if pvalue < alpha:
                    out_buffer.write(
                        u"One can say that samples come from populations "
                        "with different means, since ANOVA rejects the "
                        "null hypothesis "
                        "(statistic={statistic:.2f}, {pvalue_str}).\n"
                        "".format(pvalue_str=_pvalue_to_str(pvalue), **locals())
                    )
                else:
                    out_buffer.write(
                        u"Thus, it was not possible to find evidence that"
                        " the means of populations are different "
                        "(statistic={statistic:.2f},{rho}={pvalue:.2f}).\n"
                        "".format(**locals())
                    )
            _flush_output(out, out_buffer, latex)
            return statistic, pvalue, f_oneway
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号