def _compute_count_purity(counts0, counts1):
""" Compute fraction of counts in putative single-cell GEMs
originating from the non-cell transcriptome """
gem_occupancy = MultiGenomeAnalysis._classify_gems(counts0, counts1)
frac0 = counts0.astype(float) / (counts0 + counts1).astype(float)
purity0 = frac0[gem_occupancy == cr_constants.GEM_CLASS_GENOME0]
purity1 = 1 - frac0[gem_occupancy == cr_constants.GEM_CLASS_GENOME1]
overall_purity = np.concatenate([purity0, purity1])
# Compute number of purity outliers
threshold0, threshold1 = 1.0, 1.0
fit_purity0 = purity0[np.logical_and(purity0 > 0, purity0 < 1)]
fit_purity1 = purity1[np.logical_and(purity1 > 0, purity1 < 1)]
if len(fit_purity0) > 1 and len(fit_purity1) > 1:
try:
alpha0, beta0, _, _ = scipy.stats.beta.fit(fit_purity0, floc=0, fscale=1)
alpha1, beta1, _, _ = scipy.stats.beta.fit(fit_purity1, floc=0, fscale=1)
threshold0 = scipy.stats.beta.ppf(cr_constants.COUNT_PURITY_OUTLIER_PROB_THRESHOLD, alpha0, beta0)
threshold1 = scipy.stats.beta.ppf(cr_constants.COUNT_PURITY_OUTLIER_PROB_THRESHOLD, alpha1, beta1)
except scipy.stats._continuous_distns.FitSolverError as e:
print >> sys.stderr, e
threshold0, threshold1 = 1.0, 1.0
except scipy.stats._continuous_distns.FitDataError as e:
print >> sys.stderr, e
threshold0, threshold1 = 1.0, 1.0
outlier0 = np.logical_and(gem_occupancy == cr_constants.GEM_CLASS_GENOME0,
frac0 < threshold0)
outlier1 = np.logical_and(gem_occupancy == cr_constants.GEM_CLASS_GENOME1,
(1-frac0) < threshold1)
n_outlier0 = sum(outlier0)
n_outlier1 = sum(outlier1)
frac_outlier0 = tk_stats.robust_divide(n_outlier0, len(purity0))
frac_outlier1 = tk_stats.robust_divide(n_outlier1, len(purity1))
is_outlier = np.logical_or(outlier0, outlier1).astype(int)
return (purity0.mean(), purity1.mean(), overall_purity.mean(),
n_outlier0, n_outlier1, frac_outlier0, frac_outlier1,
is_outlier)
评论列表
文章目录