significance.py 文件源码-python代码片段

def run_test(q1_pos, q2_pos, q1_neg,q2_neg):
    '''
    this method takes four parallel arrays representing a 2X2 contingency table.
    the length of these parallel arrays denotes the number of tests that will be run,
    either a chi-squared test or an fisher-exact test are run, epending whether the requriments for a
    reliable chi-squared test are satisifed.

    Bonferroni correction is then applied by adjusting the p-values for all of the tests

    We return two parellel arrays, the first array is the p-values of for the tests, the second array is the test value
    e.g. the chi-squared value or the fisher-exact oddsratio.

    '''

    input = [q1_pos, q2_pos, q1_neg,q2_neg]
    n = len(input[0])
    if not all(len(x) == n for x in  input):
        raise BaseException ("length of input lists must be of same length")

    pvalues = []
    test_values = []

    for i in range(0,n):

        obs = np.array([ [input[0][i],input[1][i]],[input[2][i],input[3][i]] ])
        if useFisherExact(obs):
            p = fisher_exact(obs)[1]
            t = fisher_exact(obs)[0]
        else:
            p = chi2_contingency(obs)[1]
            t = chi2_contingency(obs)[0]

        pvalues.append(p)
        test_values.append(t)
    #applying Bonferroni correction
    adjustedPValues = [ float(p)/float(n) for p in pvalues]
    return [adjustedPValues, test_values]