analysis.py 文件源码-python代码片段

def enrich_signature(method="pca", percentile=99, results_dir="results", experiment="CROP-seq_Jurkat_TCR", n_genes=500):
    """
    """
    diff = pd.read_csv(os.path.join(results_dir, "{}.differential_expression.{}.stimutation.csv".format(experiment, method)), squeeze=True, index_col=0, header=None, names=["gene_name"])
    degs = pd.Series(diff[abs(diff) > np.percentile(abs(diff), percentile)].index)
    degs.name = "gene_name"

    enr = enrichr(degs.reset_index())
    enr.to_csv(os.path.join(results_dir, "differential_expression.{}.enrichr.csv".format(method)), index=False, encoding="utf8")

    # Plot top N terms of each library
    n = 8

    to_plot = [
        'GO_Biological_Process_2015',
        "KEGG_2016",
        "WikiPathways_2016",
        "Reactome_2016",
        "BioCarta_2016",
        "NCI-Nature_2016"]

    p = enr.ix[enr[enr['gene_set_library'].isin(to_plot)].groupby("gene_set_library")['combined_score'].nlargest(n).index.get_level_values(1)].sort_values("combined_score", ascending=False)

    fig, axis = plt.subplots(1)
    sns.barplot(data=p, y="description", x="combined_score", orient="horiz", hue="gene_set_library")
    axis.set_xlabel("Combined score")
    sns.despine(fig)
    fig.savefig(os.path.join(results_dir, "differential_expression.{}.enrichr.top{}_terms.svg".format(method, n)), bbox_inches="tight")