def enrich_signature(method="pca", percentile=99, results_dir="results", experiment="CROP-seq_Jurkat_TCR", n_genes=500):
"""
"""
diff = pd.read_csv(os.path.join(results_dir, "{}.differential_expression.{}.stimutation.csv".format(experiment, method)), squeeze=True, index_col=0, header=None, names=["gene_name"])
degs = pd.Series(diff[abs(diff) > np.percentile(abs(diff), percentile)].index)
degs.name = "gene_name"
enr = enrichr(degs.reset_index())
enr.to_csv(os.path.join(results_dir, "differential_expression.{}.enrichr.csv".format(method)), index=False, encoding="utf8")
# Plot top N terms of each library
n = 8
to_plot = [
'GO_Biological_Process_2015',
"KEGG_2016",
"WikiPathways_2016",
"Reactome_2016",
"BioCarta_2016",
"NCI-Nature_2016"]
p = enr.ix[enr[enr['gene_set_library'].isin(to_plot)].groupby("gene_set_library")['combined_score'].nlargest(n).index.get_level_values(1)].sort_values("combined_score", ascending=False)
fig, axis = plt.subplots(1)
sns.barplot(data=p, y="description", x="combined_score", orient="horiz", hue="gene_set_library")
axis.set_xlabel("Combined score")
sns.despine(fig)
fig.savefig(os.path.join(results_dir, "differential_expression.{}.enrichr.top{}_terms.svg".format(method, n)), bbox_inches="tight")
评论列表
文章目录