def gRNA_rank(s1, s2, prefix="", text=False, n_labels=30):
# Rank of gRNA change
fig, axis = plt.subplots(3, 2, sharex=True, sharey=True, figsize=(8, 8))
axis = axis.flatten()
for i, screen in enumerate(s2.columns[::-1]):
x = s1.join(s2) # .fillna(0)
x = x.iloc[np.random.permutation(len(x))]
x = x.ix[x.index[~x.index.str.contains("Wnt")]]
if prefix.startswith("mid_screen-"):
b = x["gDNA_Jurkat"]
else:
b = x["plasmid_pool_TCR"]
x = x.fillna(0)
b = b.fillna(0)
fc = np.log2(1 + x[screen]) - np.log2(1 + b)
fc.name = screen
if i == 0:
xx = pd.DataFrame(fc)
else:
xx = xx.join(fc, how="outer")
colors = pd.DataFrame()
colors[sns.color_palette("colorblind")[0]] = x.index.str.contains("Wnt")
colors[sns.color_palette("colorblind")[1]] = x.index.str.contains("CTRL")
colors[sns.color_palette("colorblind")[2]] = x.index.str.contains("Tcr")
colors[sns.color_palette("colorblind")[3]] = x.index.str.contains("Ess")
colors = colors.apply(lambda x: x[x].index.tolist()[0], axis=1).tolist()
axis[i].scatter(fc.rank(ascending=False, method="first"), fc, color=colors, alpha=0.5)
if text:
for j in x[x.index.str.contains("ETS1|GATA3|RUNX1")].index:
axis[i].text(
fc.rank(ascending=False, method="first").ix[j],
fc.ix[j],
j)
axis[i].axhline(y=0, color='black', linestyle='--', lw=0.5)
axis[i].set_title(screen)
for i in range(0, len(axis), 2):
axis[i].set_ylabel("gRNA fold-change")
for ax in axis[-2:]:
ax.set_xlabel("gRNA rank")
sns.despine(fig)
fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.rank.{}svg".format(prefix, "text." if text else "")), bbox_inches="tight")
fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.rank.{}pdf".format(prefix, "text." if text else "")), bbox_inches="tight")
# Save ranked list
xx.to_csv(os.path.join(results_dir, "gRNA_counts.norm.{}.rank.csv".format(prefix)), index=True)
# Save ranked list of gene-level measurements, reduced by mean and min
m = pd.merge(xx.reset_index(), guide_annotation[["oligo_name", "gene"]], left_on="gRNA_name", right_on="oligo_name").drop("oligo_name", axis=1).set_index(["gene", "gRNA_name"])
m.groupby(level=[0]).mean().to_csv(os.path.join(results_dir, "gRNA_counts.norm.{}.gene_mean.rank.csv".format(prefix)), index=True)
m.groupby(level=[0]).min().to_csv(os.path.join(results_dir, "gRNA_counts.norm.{}.gene_min.rank.csv".format(prefix)), index=True)
评论列表
文章目录