def plot_read_length_hist():
# Close any previous plots
plt.close('all')
num_bins = 50
seq_df = ALL_READS["seq_length"]
if CLIP:
# Filter out the top 1000th percentile.
seq_df = seq_df[seq_df < seq_df.quantile(0.9995)]
def y_hist_to_human_readable_seq(y, position):
# Convert distribution to base pairs
if y == 0:
return 0
s = humanfriendly.format_size(seq_df.sum() * y, binary=False)
return reformat_human_friendly(s)
# Define how many plots we want (1)
fig, ax = plt.subplots(1)
# Set the axis formatters
ax.yaxis.set_major_formatter(FuncFormatter(y_hist_to_human_readable_seq))
ax.xaxis.set_major_formatter(FuncFormatter(x_hist_to_human_readable))
# Plot the histogram
h, w, p = ax.hist(seq_df, num_bins, weights=seq_df,
normed=1, facecolor='blue', alpha=0.76)
bin_width = reformat_human_friendly(humanfriendly.format_size(w[1]-w[0], binary=False))
# Set the titles and axis labels
ax.set_title(f"Read Distribution Graph for {SAMPLE_NAME}")
ax.grid(color='black', linestyle=':', linewidth=0.5)
ax.set_xlabel(f"Read length: Bin Widths={bin_width}")
ax.set_ylabel("Bases per bin")
# Ensure labels are not missed.
fig.tight_layout()
savefig(os.path.join(PLOTS_DIR, f"{SAMPLE_NAME.replace(' ', '_')}_hist_read_length_by_basepair.png"))
评论列表
文章目录