plot_yields.py 文件源码-python代码片段

def plot_read_length_hist():
    # Close any previous plots
    plt.close('all')
    num_bins = 50
    seq_df = ALL_READS["seq_length"]
    if CLIP:
        # Filter out the top 1000th percentile.
        seq_df = seq_df[seq_df < seq_df.quantile(0.9995)]

    def y_hist_to_human_readable_seq(y, position):
        # Convert distribution to base pairs
        if y == 0:
            return 0
        s = humanfriendly.format_size(seq_df.sum() * y, binary=False)
        return reformat_human_friendly(s)

    # Define how many plots we want (1)
    fig, ax = plt.subplots(1)
    # Set the axis formatters
    ax.yaxis.set_major_formatter(FuncFormatter(y_hist_to_human_readable_seq))
    ax.xaxis.set_major_formatter(FuncFormatter(x_hist_to_human_readable))
    # Plot the histogram
    h, w, p = ax.hist(seq_df, num_bins, weights=seq_df,
                      normed=1, facecolor='blue', alpha=0.76)
    bin_width = reformat_human_friendly(humanfriendly.format_size(w[1]-w[0], binary=False))
    # Set the titles and axis labels
    ax.set_title(f"Read Distribution Graph for {SAMPLE_NAME}")
    ax.grid(color='black', linestyle=':', linewidth=0.5)
    ax.set_xlabel(f"Read length: Bin Widths={bin_width}")
    ax.set_ylabel("Bases per bin")
    # Ensure labels are not missed.
    fig.tight_layout()
    savefig(os.path.join(PLOTS_DIR, f"{SAMPLE_NAME.replace(' ', '_')}_hist_read_length_by_basepair.png"))