multigenome.py 文件源码-python代码片段

def _compute_count_purity(counts0, counts1):
        """ Compute fraction of counts in putative single-cell GEMs
        originating from the non-cell transcriptome """
        gem_occupancy = MultiGenomeAnalysis._classify_gems(counts0, counts1)
        frac0 = counts0.astype(float) / (counts0 + counts1).astype(float)
        purity0 = frac0[gem_occupancy == cr_constants.GEM_CLASS_GENOME0]
        purity1 = 1 - frac0[gem_occupancy == cr_constants.GEM_CLASS_GENOME1]
        overall_purity = np.concatenate([purity0, purity1])

        # Compute number of purity outliers
        threshold0, threshold1 = 1.0, 1.0
        fit_purity0 = purity0[np.logical_and(purity0 > 0, purity0 < 1)]
        fit_purity1 = purity1[np.logical_and(purity1 > 0, purity1 < 1)]
        if len(fit_purity0) > 1 and len(fit_purity1) > 1:
            try:
                alpha0, beta0, _, _ = scipy.stats.beta.fit(fit_purity0, floc=0, fscale=1)
                alpha1, beta1, _, _ = scipy.stats.beta.fit(fit_purity1, floc=0, fscale=1)
                threshold0 = scipy.stats.beta.ppf(cr_constants.COUNT_PURITY_OUTLIER_PROB_THRESHOLD, alpha0, beta0)
                threshold1 = scipy.stats.beta.ppf(cr_constants.COUNT_PURITY_OUTLIER_PROB_THRESHOLD, alpha1, beta1)
            except scipy.stats._continuous_distns.FitSolverError as e:
                print >> sys.stderr, e
                threshold0, threshold1 = 1.0, 1.0
            except scipy.stats._continuous_distns.FitDataError as e:
                print >> sys.stderr, e
                threshold0, threshold1 = 1.0, 1.0

        outlier0 = np.logical_and(gem_occupancy == cr_constants.GEM_CLASS_GENOME0,
                                  frac0 < threshold0)
        outlier1 = np.logical_and(gem_occupancy == cr_constants.GEM_CLASS_GENOME1,
                                  (1-frac0) < threshold1)
        n_outlier0 = sum(outlier0)
        n_outlier1 = sum(outlier1)
        frac_outlier0 = tk_stats.robust_divide(n_outlier0, len(purity0))
        frac_outlier1 = tk_stats.robust_divide(n_outlier1, len(purity1))
        is_outlier = np.logical_or(outlier0, outlier1).astype(int)

        return (purity0.mean(), purity1.mean(), overall_purity.mean(),
                n_outlier0, n_outlier1, frac_outlier0, frac_outlier1,
                is_outlier)