def run_differential_expression(matrix, clusters, sseq_params=None):
""" Compute differential expression for each cluster vs all other cells
Args: matrix - GeneBCMatrix : gene expression data
clusters - np.array(int) : 1-based cluster labels
sseq_params - dict : params from compute_sseq_params """
n_clusters = np.max(clusters)
if sseq_params is None:
print "Computing params..."
sys.stdout.flush()
sseq_params = compute_sseq_params(matrix.m)
# Create a numpy array with 3*K columns;
# each group of 3 columns is mean, log2, pvalue for cluster i
all_de_results = np.zeros((matrix.genes_dim, 3*n_clusters))
for cluster in xrange(1, 1+n_clusters):
in_cluster = clusters == cluster
group_a = np.flatnonzero(in_cluster)
group_b = np.flatnonzero(np.logical_not(in_cluster))
print 'Computing DE for cluster %d...' % cluster
sys.stdout.flush()
de_result = sseq_differential_expression(
matrix.m, group_a, group_b, sseq_params)
all_de_results[:, 0+3*(cluster-1)] = de_result['norm_mean_a']
all_de_results[:, 1+3*(cluster-1)] = de_result['log2_fold_change']
all_de_results[:, 2+3*(cluster-1)] = de_result['adjusted_p_value']
return DIFFERENTIAL_EXPRESSION(all_de_results)
评论列表
文章目录