python类fisher_exact()的实例源码-面圈网

tableone.py 文件源码项目：tableone 作者: tompollard 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def _p_test(self,v,grouped_data,is_continuous,is_categorical,
            is_normal,min_observed,catlevels,
            pval=np.nan,ptest='Not tested'):
        """
        Compute p value
        """

        # do not test if any sub-group has no observations
        if min_observed == 0:
            warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))
            return pval,ptest

        # continuous
        if is_continuous and is_normal:
            # normally distributed
            ptest = 'One-way ANOVA'
            test_stat, pval = stats.f_oneway(*grouped_data)
        elif is_continuous and not is_normal:
            # non-normally distributed
            ptest = 'Kruskal-Wallis'
            test_stat, pval = stats.kruskal(*grouped_data)
        # categorical
        elif is_categorical:
            # default to chi-squared
            ptest = 'Chi-squared'
            chi2, pval, dof, expected = stats.chi2_contingency(grouped_data)
            # if any expected cell counts are < 5, chi2 may not be valid
            # if this is a 2x2, switch to fisher exact
            if expected.min() < 5:
                if grouped_data.shape == (2,2):
                    ptest = 'Fisher''s exact'
                    oddsratio, pval = stats.fisher_exact(grouped_data)
                else:
                    ptest = 'Chi-squared (warning: expected count < 5)'
                    warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))

        return pval,ptest

peakcaller.bak.py 文件源码项目：CLAM 作者: Xinglab 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def test_bin_fisher(intv_bin_ip, intv_bin_con, with_control=True, correction_method='fdr_bh'):
    """DOCSTRING
    Args
    Returns
    """
    if intv_bin_ip.shape[0] != 1:
        raise Exception('Fisher exact test does not deal with replicates.')
    intv_counter = intv_bin_ip.shape[1]
    assert intv_counter == intv_bin_con.shape[1]
    binscore = np.empty(intv_counter)
    binsignal = np.empty(intv_counter)
    ip_sum = np.sum(intv_bin_ip[0,])
    con_sum = np.sum(intv_bin_con[0,])
    for i in range(intv_counter):
        this_ip = intv_bin_ip[0, i]
        others_ip = ip_sum - this_ip
        this_con = intv_bin_con[0, i]
        others_con = con_sum - this_con
        if this_ip == 0:
            binsignal[i], binscore[i] = np.nan, 1.0
            continue
        _, binscore[i] = fisher_exact([[this_ip, others_ip], [this_con, others_con]], alternative='greater')
        if with_control:
            binsignal[i] = this_ip/others_ip / this_con*others_con
        else:
            binsignal[i] = this_ip

    adj = multipletests(binscore, alpha=0.05, method=correction_method)
    binscore_adj = adj[1]
    return binsignal, binscore_adj

peakcaller.bak2.py 文件源码项目：CLAM 作者: Xinglab 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_bin_fisher(intv_bin_ip, intv_bin_con, with_control=True, correction_method='fdr_bh'):
    """DOCSTRING
    Args
    Returns
    """
    if intv_bin_ip.shape[0] != 1:
        raise Exception('Fisher exact test does not deal with replicates.')
    intv_counter = intv_bin_ip.shape[1]
    assert intv_counter == intv_bin_con.shape[1]
    binscore = np.empty(intv_counter)
    binsignal = np.empty(intv_counter)
    ip_sum = np.sum(intv_bin_ip[0,])
    con_sum = np.sum(intv_bin_con[0,])
    for i in range(intv_counter):
        this_ip = intv_bin_ip[0, i]
        others_ip = ip_sum - this_ip
        this_con = intv_bin_con[0, i]
        others_con = con_sum - this_con
        if this_ip == 0:
            binsignal[i], binscore[i] = np.nan, 1.0
            continue
        _, binscore[i] = fisher_exact([[this_ip, others_ip], [this_con, others_con]], alternative='greater')
        if with_control:
            binsignal[i] = this_ip/others_ip / this_con*others_con
        else:
            binsignal[i] = this_ip

    adj = multipletests(binscore, alpha=0.05, method=correction_method)
    binscore_adj = adj[1]
    return binsignal, binscore_adj

association.py 文件源码项目：Price-Comparator 作者: Thejas-1 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def fisher_exact(*_args, **_kwargs):
        raise NotImplementedError

### Indices to marginals arguments:

association.py 文件源码项目：Price-Comparator 作者: Thejas-1 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def fisher(cls, *marginals):
        """Scores bigrams using Fisher's Exact Test (Pedersen 1996).  Less
        sensitive to small counts than PMI or Chi Sq, but also more expensive
        to compute. Requires scipy.
        """

        n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals)

        (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative='less')
        return pvalue

data.py 文件源码项目：indefinite-pronouns 作者: dnrb 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def get_tf_associations(self, test):
        # test = {not dissociated,associated}
        tf_set = set()
        # this is the set in which all Term - Function pairs will be contained
        # that cannot be dissociated (i.e, for which we do not know for sure that
        # they are not associated) - done with Fisher Exact tests
        for onto in set(self.ontological):
            if onto not in ['body','thing']: continue
            d_onto = self.data[self.ontological == onto]
            for li in range(30):
                terms = set([w for dd in d_onto for w in dd[li]])
                for term in terms:
                    for annot in set(self.annotation):
                        valid = False
                        if annot == 'UF': continue
                        d_onto_annot = self.data[(self.ontological == onto) * (self.annotation == annot)]
                        aa = len([t for t in d_onto_annot if term in t[li]]) # + term + function
                        ab = len(d_onto_annot) - aa # - term + function
                        ba = len([t for t in d_onto if term in t[li]]) - aa # + term - function
                        bb = len(d_onto) - (aa + ab + ba) # - term - function
                        if test == 'not dissociated' and fisher_exact([[aa,ab],[ba,bb]],'less')[1] > .05:
                            valid = True
                            tf_set.add((li,term,annot))
                        if test == 'associated' and fisher_exact([[aa,ab],[ba,bb]],'greater')[1] < .05:
                            valid = True
                            tf_set.add((li,term,annot))
                        # if aa > 0: print('%s,%d,%s,%s,%r,%d,%d,%d' % (onto,li,term,annot,valid,aa,ba,ab))
        return tf_set

association.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def fisher_exact(*_args, **_kwargs):
        raise NotImplementedError

### Indices to marginals arguments:

association.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def fisher(cls, *marginals):
        """Scores bigrams using Fisher's Exact Test (Pedersen 1996).  Less
        sensitive to small counts than PMI or Chi Sq, but also more expensive
        to compute. Requires scipy.
        """

        n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals)

        (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative='less')
        return pvalue

TermDocMatrix.py 文件源码项目：scattertext 作者: JasonKessler 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def _get_fisher_scores_from_counts(self, cat_word_counts, not_cat_word_counts):
        cat_not_word_counts = cat_word_counts.sum() - cat_word_counts
        not_cat_not_word_counts = not_cat_word_counts.sum() - not_cat_word_counts

        def do_fisher_exact(x):
            return fisher_exact([[x[0], x[1]], [x[2], x[3]]], alternative='greater')

        odds_ratio, p_values = np.apply_along_axis(
            do_fisher_exact,
            0,
            np.array([cat_word_counts, cat_not_word_counts, not_cat_word_counts, not_cat_not_word_counts]))
        return odds_ratio, p_values

association.py 文件源码项目：neighborhood_mood_aws 作者: jarrellmark 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def fisher_exact(*_args, **_kwargs):
        raise NotImplementedError

### Indices to marginals arguments:

association.py 文件源码项目：neighborhood_mood_aws 作者: jarrellmark 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def fisher(cls, *marginals):
        """Scores bigrams using Fisher's Exact Test (Pedersen 1996).  Less
        sensitive to small counts than PMI or Chi Sq, but also more expensive
        to compute. Requires scipy.
        """

        n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals)

        (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative='less')
        return pvalue

significance.py 文件源码项目：chat 作者: cambridgeltl 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def run_test(q1_pos, q2_pos, q1_neg,q2_neg):
    '''
    this method takes four parallel arrays representing a 2X2 contingency table.
    the length of these parallel arrays denotes the number of tests that will be run,
    either a chi-squared test or an fisher-exact test are run, epending whether the requriments for a
    reliable chi-squared test are satisifed.

    Bonferroni correction is then applied by adjusting the p-values for all of the tests

    We return two parellel arrays, the first array is the p-values of for the tests, the second array is the test value
    e.g. the chi-squared value or the fisher-exact oddsratio.

    '''

    input = [q1_pos, q2_pos, q1_neg,q2_neg]
    n = len(input[0])
    if not all(len(x) == n for x in  input):
        raise BaseException ("length of input lists must be of same length")

    pvalues = []
    test_values = []

    for i in range(0,n):

        obs = np.array([ [input[0][i],input[1][i]],[input[2][i],input[3][i]] ])
        if useFisherExact(obs):
            p = fisher_exact(obs)[1]
            t = fisher_exact(obs)[0]
        else:
            p = chi2_contingency(obs)[1]
            t = chi2_contingency(obs)[0]

        pvalues.append(p)
        test_values.append(t)
    #applying Bonferroni correction
    adjustedPValues = [ float(p)/float(n) for p in pvalues]
    return [adjustedPValues, test_values]

association.py 文件源码项目：hate-to-hugs 作者: sdoran35 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def fisher_exact(*_args, **_kwargs):
        raise NotImplementedError

### Indices to marginals arguments:

association.py 文件源码项目：hate-to-hugs 作者: sdoran35 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def fisher(cls, *marginals):
        """Scores bigrams using Fisher's Exact Test (Pedersen 1996).  Less
        sensitive to small counts than PMI or Chi Sq, but also more expensive
        to compute. Requires scipy.
        """

        n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals)

        (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative='less')
        return pvalue

association.py 文件源码项目：FancyWord 作者: EastonLee 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def fisher_exact(*_args, **_kwargs):
        raise NotImplementedError

### Indices to marginals arguments:

association.py 文件源码项目：FancyWord 作者: EastonLee 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def fisher(cls, *marginals):
        """Scores bigrams using Fisher's Exact Test (Pedersen 1996).  Less
        sensitive to small counts than PMI or Chi Sq, but also more expensive
        to compute. Requires scipy.
        """

        n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals)

        (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative='less')
        return pvalue

association.py 文件源码项目：beepboop 作者: nicolehe 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def fisher_exact(*_args, **_kwargs):
        raise NotImplementedError

### Indices to marginals arguments:

association.py 文件源码项目：beepboop 作者: nicolehe 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def fisher(cls, *marginals):
        """Scores bigrams using Fisher's Exact Test (Pedersen 1996).  Less
        sensitive to small counts than PMI or Chi Sq, but also more expensive
        to compute. Requires scipy.
        """

        n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals)

        (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative='less')
        return pvalue

association.py 文件源码项目：kind2anki 作者: prz3m 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def fisher_exact(*_args, **_kwargs):
        raise NotImplementedError

### Indices to marginals arguments:

association.py 文件源码项目：kind2anki 作者: prz3m 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def fisher(cls, *marginals):
        """Scores bigrams using Fisher's Exact Test (Pedersen 1996).  Less
        sensitive to small counts than PMI or Chi Sq, but also more expensive
        to compute. Requires scipy.
        """

        n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals)

        (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative='less')
        return pvalue

association.py 文件源码项目：but_sentiment 作者: MixedEmotions 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def fisher_exact(*_args, **_kwargs):
        raise NotImplementedError

### Indices to marginals arguments:

association.py 文件源码项目：but_sentiment 作者: MixedEmotions 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def fisher(cls, *marginals):
        """Scores bigrams using Fisher's Exact Test (Pedersen 1996).  Less
        sensitive to small counts than PMI or Chi Sq, but also more expensive
        to compute. Requires scipy.
        """

        n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals)

        (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative='less')
        return pvalue

concordance_analysis.py 文件源码项目：microbiomeHD 作者: cduvallet 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def concordance(series1, series2, method, nreps=1000):
    """
    Measures the concordance between two pandas Series and returns a pvalue
    and measure of concordance.

    Parameters
    ----------
    series1, series2 : pandas Series
        Series with matching indexes.
    method : str
        ['fisher', 'spearman', 'kendalltau', 'empirical', 'cohen']
    nreps : int
        number of repititions to build the null. Only needed if method is
        'empirical'

    Returns
    -------
    measure : float
        some sort of measure of concordance (e.g. r for the correlation
        methods, n_observed - mean(n_expected) for empirical, etc)
    p : float
        p value of observed concordance between series1 and series2
    """

    if method == 'fisher':
        # Note: this automatically ignores any bugs which were not present
        # in both series.
        mat = pd.crosstab(series1, series2)
        return fisher_exact(mat)

    elif method == 'spearman':
        return spearmanr(series1, series2)

    elif method == 'kendalltau':
        return kendalltau(series1, series2, nan_policy='omit')

    elif method == 'empirical':
        return empirical_pval(series1, series2, nreps)

    elif method == 'cohen':
        tmp = pd.concat((series1, series2), axis=1).dropna()
        return cohen_kappa_score(tmp.iloc[:, 0], tmp.iloc[:, 1]), np.nan

    else:
        raise ValueError('Unknown concordance method.')

feature_pathway_overrepresentation.py 文件源码项目：PathCORE-T 作者: greenelab 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def single_side_pathway_enrichment(pathway_definitions,
                                   gene_signature,
                                   n_genes):
    """Identify overrepresented pathways using the Fisher's exact test for
    significance on a given pathway definition and gene signature.
    (FDR correction for multiple testing is applied in
    `_significant_pathways_dataframe`).

    Parameters
    -----------
    pathway_definitions : dict(str -> set(str))
      Pathway definitions, *post*-overlap-correction if this function
      is called from `pathway_enrichment_with_overlap_correction`.
      A pathway (key) is defined by a set of genes (value).
    gene_signature : set(str)
      The set of genes we consider to be enriched in a feature.
    n_genes : int
      The total number of genes for which we have assigned weights in the
      features of an unsupervised model.

    Returns
    -----------
    pandas.Series, for each pathway, the p-value from applying the Fisher's
      exact test.
    """
    if not gene_signature:
        return pd.Series(name="p-value")
    pvalues_list = []
    for pathway, definition in pathway_definitions.items():
        if isinstance(definition, tuple):
            definition = set.union(*definition)

        both_definition_and_signature = len(definition & gene_signature)
        in_definition_not_signature = (len(definition) -
                                       both_definition_and_signature)
        in_signature_not_definition = (len(gene_signature) -
                                       both_definition_and_signature)
        neither_definition_nor_signature = (n_genes -
                                            both_definition_and_signature -
                                            in_definition_not_signature -
                                            in_signature_not_definition)
        contingency_table = np.array(
            [[both_definition_and_signature, in_signature_not_definition],
             [in_definition_not_signature, neither_definition_nor_signature]])
        try:
            _, pvalue = stats.fisher_exact(
                contingency_table, alternative="greater")
            pvalues_list.append(pvalue)
        # FPE can occur when `neither_definition_nor_signature` is very
        # large and `both_definition_and_signature` is very small (near zero)
        except FloatingPointError:
            pvalues_list.append(1.0)
    pvalues_series = pd.Series(
        pvalues_list, index=pathway_definitions.keys(), name="p-value")
    return pvalues_series

plot.py 文件源码项目：cohorts 作者: hammerlab 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def fishers_exact_plot(data, condition1, condition2, ax=None,
                       condition1_value=None,
                       alternative="two-sided", **kwargs):
    """
    Perform a Fisher's exact test to compare to binary columns

    Parameters
    ----------
    data: Pandas dataframe
        Dataframe to retrieve information from

    condition1: str
        First binary column to compare (and used for test sidedness)

    condition2: str
        Second binary column to compare

    ax : Axes, default None
        Axes to plot on

    condition1_value:
        If `condition1` is not a binary column, split on =/!= to condition1_value

    alternative:
        Specify the sidedness of the test: "two-sided", "less"
        or "greater"
    """
    plot = sb.barplot(
        x=condition1,
        y=condition2,
        ax=ax,
        data=data,
        **kwargs
    )

    plot.set_ylabel("Percent %s" % condition2)
    condition1_mask = get_condition_mask(data, condition1, condition1_value)
    count_table = pd.crosstab(data[condition1], data[condition2])
    print(count_table)
    oddsratio, p_value = fisher_exact(count_table, alternative=alternative)
    add_significance_indicator(plot=plot, significant=p_value <= 0.05)
    only_percentage_ticks(plot)

    if alternative != "two-sided":
        raise ValueError("We need to better understand the one-sided Fisher's Exact test")
    sided_str = "two-sided"
    print("Fisher's Exact Test: OR: {}, p-value={} ({})".format(oddsratio, p_value, sided_str))
    return FishersExactResults(oddsratio=oddsratio,
                               p_value=p_value,
                               sided_str=sided_str,
                               with_condition1_series=data[condition1_mask][condition2],
                               without_condition1_series=data[~condition1_mask][condition2],
                               plot=plot)