def diff_exp(matrix, group1, group2, index):
"""Computes differential expression between group 1 and group 2
for each column in the dataframe counts.
Returns a dataframe of Z-scores and p-values."""
g1 = matrix[group1, :]
g2 = matrix[group2, :]
g1mu = g1.mean(0)
g2mu = g2.mean(0)
mean_diff = np.asarray(g1mu - g2mu).flatten()
# E[X^2] - (E[X])^2
pooled_sd = np.sqrt(
((g1.power(2)).mean(0) - np.power(g1mu, 2)) / len(group1)
+ ((g2.power(2)).mean(0) - np.power(g2mu, 2)) / len(group2))
pooled_sd = np.asarray(pooled_sd).flatten()
z_scores = np.zeros_like(pooled_sd)
nz = pooled_sd > 0
z_scores[nz] = np.nan_to_num(mean_diff[nz] / pooled_sd[nz])
# t-test
p_vals = (1 - stats.norm.cdf(np.abs(z_scores))) * 2
df = pd.DataFrame(OrderedDict([('z', z_scores), ('p', p_vals)]),
index=index)
return df
评论列表
文章目录