python类despine()的实例源码

figs.py 文件源码 项目:extract 作者: dblalock 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def makeFig1():
    ts = getFig1Ts()

    # set up axes
    ax1 = plt.subplot2grid((2,2), (0,0), colspan=2)
    ax2 = plt.subplot2grid((2,2), (1,0))
    ax3 = plt.subplot2grid((2,2), (1,1))
    axes = [ax1, ax2, ax3]

    for ax in axes:
        ax.autoscale(tight=True)
        sb.despine(left=True, ax=ax)

    ts.plot(showLabels=False, showBounds=False, ax=ax1)

    lengths = [150]
    ts_sota = labelTs_sota(ts, lengths)
    ts_sota.plot(showLabels=False, ax=ax2)

    ts_ff = labelTs_ff(ts, 100, 200) # Lmin, Lmax
    ts_ff.plot(showLabels=False, ax=ax3)

    plt.setp(ax3.get_yticklabels(), visible=False)
    ax1.set_title("Patterns in Dishwasher Dataset")
    ax1.set_xlabel("Minute")
    ax2.set_title("State-of-the-art")
    ax3.set_title("Proposed")

    plt.tight_layout()
    plt.show()
analysis.py 文件源码 项目:crop-seq 作者: epigen 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def enrich_signature(method="pca", percentile=99, results_dir="results", experiment="CROP-seq_Jurkat_TCR", n_genes=500):
    """
    """
    diff = pd.read_csv(os.path.join(results_dir, "{}.differential_expression.{}.stimutation.csv".format(experiment, method)), squeeze=True, index_col=0, header=None, names=["gene_name"])
    degs = pd.Series(diff[abs(diff) > np.percentile(abs(diff), percentile)].index)
    degs.name = "gene_name"

    enr = enrichr(degs.reset_index())
    enr.to_csv(os.path.join(results_dir, "differential_expression.{}.enrichr.csv".format(method)), index=False, encoding="utf8")

    # Plot top N terms of each library
    n = 8

    to_plot = [
        'GO_Biological_Process_2015',
        "KEGG_2016",
        "WikiPathways_2016",
        "Reactome_2016",
        "BioCarta_2016",
        "NCI-Nature_2016"]

    p = enr.ix[enr[enr['gene_set_library'].isin(to_plot)].groupby("gene_set_library")['combined_score'].nlargest(n).index.get_level_values(1)].sort_values("combined_score", ascending=False)

    fig, axis = plt.subplots(1)
    sns.barplot(data=p, y="description", x="combined_score", orient="horiz", hue="gene_set_library")
    axis.set_xlabel("Combined score")
    sns.despine(fig)
    fig.savefig(os.path.join(results_dir, "differential_expression.{}.enrichr.top{}_terms.svg".format(method, n)), bbox_inches="tight")
screen_dynamics.py 文件源码 项目:crop-seq 作者: epigen 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def gRNA_scatter(s1, s2, prefix="", text=False, n_labels=30):
    # Scatter of gRNA change
    fig, axis = plt.subplots(3, 2, sharex=False, sharey=False, figsize=(8, 8))
    axis = axis.flatten()

    for i, screen in enumerate(s2.columns[::-1]):
        x = s1.join(s2)  # .fillna(0)
        x = x.iloc[np.random.permutation(len(x))]

        x = x.ix[x.index[~x.index.str.contains("Wnt")]]
        if prefix.startswith("mid_screen-"):
            b = x["gDNA_Jurkat"]
        else:
            b = x["plasmid_pool_TCR"]
        x = x.fillna(0)
        b = b.fillna(0)

        colors = pd.DataFrame()
        colors[sns.color_palette("colorblind")[0]] = x.index.str.contains("Wnt")
        colors[sns.color_palette("colorblind")[1]] = x.index.str.contains("CTRL")
        colors[sns.color_palette("colorblind")[2]] = x.index.str.contains("Tcr")
        colors[sns.color_palette("colorblind")[3]] = x.index.str.contains("Ess")
        colors = colors.apply(lambda x: x[x].index.tolist()[0], axis=1).tolist()

        axis[i].scatter(np.log2(1 + x[screen]), np.log2(1 + b), color=colors, alpha=0.5)
        if text:
            for j in x[x.index.str.contains("ETS1|GATA3|RUNX1")].index:
                axis[i].text(np.log2(1 + x[screen].ix[j]), np.log2(1 + b.ix[j]), j)

        # x = y line
        lims = [np.nanmin([np.log2(1 + x[screen]), np.log2(1 + b)]), np.nanmax([np.log2(1 + x[screen]), np.log2(1 + b)])]
        axis[i].plot((lims[0], lims[1]), (lims[0], lims[1]), linestyle='--', color='black', alpha=0.75)

        axis[i].set_title(screen)
    for i in range(0, len(axis), 2):
        axis[i].set_ylabel("gRNA frequency in plasmid (log2)")
    for ax in axis[-2:]:
        ax.set_xlabel("gRNA frequency in CROP-seq screen (log2)")
    sns.despine(fig)
    fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.scatter.{}svg".format(prefix, "text." if text else "")), bbox_inches="tight")
    fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.scatter.{}pdf".format(prefix, "text." if text else "")), bbox_inches="tight")
dendrogram.py 文件源码 项目:IgDiscover 作者: NBISweden 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def main(args):
    with FastaReader(args.fasta) as fr:
        sequences = list(fr)
    logger.info('Plotting dendrogram of %s sequences', len(sequences))
    if args.mark:
        with FastaReader(args.mark) as fr:
            mark = PrefixComparer(record.sequence for record in fr)
        labels = []
        n_new = 0
        for record in sequences:
            if record.sequence not in mark:
                extra = ' (new)'
                n_new += 1
            else:
                extra = ''
            labels.append(record.name + extra)
        logger.info('%s sequence(s) marked as "new"', n_new)
    else:
        labels = [s.name for s in sequences]
    sns.set_style("white")
    font_size = 297 / 25.4 * 72 / (len(labels) + 5)
    font_size = min(16, max(6, font_size))
    height = font_size * (len(labels) + 5) / 72
    fig = plt.figure(figsize=(210 / 25.4, height))
    matplotlib.rcParams.update({'font.size': 4})
    ax = fig.gca()
    sns.despine(ax=ax, top=True, right=True, left=True, bottom=True)
    sns.set_style('whitegrid')
    if len(sequences) >= 2:
        m = distances([s.sequence for s in sequences])
        y = distance.squareform(m)
        mindist = int(y.min())
        logger.info('Smallest distance is %s. Found between:', mindist)
        for i,j in np.argwhere(m == y.min()):
            if i < j:
                logger.info('%s and %s', labels[i], labels[j])
        l = hierarchy.linkage(y, method=args.method)
        hierarchy.dendrogram(l, labels=labels, leaf_font_size=font_size, orientation='right', color_threshold=0.95*max(l[:,2]))
    else:
        ax.text(0.5, 0.5, 'no sequences', fontsize='xx-large')
    ax.grid(False)
    fig.set_tight_layout(True)
    fig.savefig(args.plot)
plots.py 文件源码 项目:AlphaPy 作者: ScottFreeLLC 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def plot_box(df, x, y, hue, tag='eda', directory=None):
    r"""Display a Box Plot.

    Parameters
    ----------
    df : pandas.DataFrame
        The dataframe containing the ``x`` and ``y`` features.
    x : str
        Variable name in ``df`` to display along the x-axis.
    y : str
        Variable name in ``df`` to display along the y-axis.
    hue : str
        Variable name to be used as hue, i.e., another data dimension.
    tag : str
        Unique identifier for the plot.
    directory : str, optional
        The full specification of the plot location.

    Returns
    -------
    None : None.

    References
    ----------

    http://seaborn.pydata.org/generated/seaborn.boxplot.html

    """

    logger.info("Generating Box Plot")

    # Generate the box plot

    box_plot = sns.boxplot(x=x, y=y, hue=hue, data=df)
    sns.despine(offset=10, trim=True)
    box_fig = box_plot.get_figure()

    # Save the plot
    write_plot('seaborn', box_fig, 'box_plot', tag, directory)


#
# Function plot_swarm
#
visualize.py 文件源码 项目:wasabiplot 作者: olgabot 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def wasabiplot(bam_filename, chrom, start, stop, strand, log_base=10,
               color='steelblue', bad_cigar=INSERTION_DELETIONS,
               coverage_cigar=COVERAGE_CIGAR, junction_cigar=JUNCTION_CIGAR,
               ax=None, coverage_kws=None, curve_height_multiplier=0.2,
               text_kws=TEXT_KWS, patch_kws=PATCH_KWS, warn_skipped=True,
               annotate=True, **kwargs):
    """Get the number of reads that matched to the reference sequence

    Parameters
    ----------
    bam_filename : str
        Name of the bam filename for logging purposes
    chrom : str
        Name of the reference chromosome
    start, stop : int
        Genome-based locations of the start and stop regions
    strand : '+' | '-'
        Strand to query
    log_base : number or None, optional
        The base to use for log-scaling the data. e.g. 10 would have log10 data
        If None, the data is not log-scaled. (default=10)
    color : valid matplotlib color
        Color to use for both the coverage and junction plotting

    allowed_cigar : tuple of str, optional
        Which CIGAR string flags are allowed. (default=('M') aka match)
    bad_cigar : tuple of str, optional
        Which CIGAR string flags are not allowed. (default=('I', 'D') aka
        insertion and deletion)

    """
    if isinstance(bam_filename, pd.Series):
        bam_filename = bam_filename.iloc[0]

    plotter = WasabiPlotter(bam_filename, chrom, start, stop, strand, log_base,
                            color, bad_cigar, coverage_cigar, junction_cigar,
                            warn_skipped)

    if ax is None:
        ax = plt.gca()

    coverage_kws = {} if coverage_kws is None else coverage_kws
    coverage_kws.update(kwargs)

    plotter.plot_coverage(color, ax, **coverage_kws)
    plotter.plot_junctions(ax, curve_height_multiplier=curve_height_multiplier,
                           text_kws=text_kws, patch_kws=patch_kws,
                           annotate=annotate)

    # Remove bottom spine
    sns.despine(ax=ax, bottom=True)

    # Add a zero-axis line
    ax.hlines(0, 0, plotter.length, linewidth=0.5, zorder=-1)

    if ax.is_last_row():
        xticks = [int(x + start) for x in ax.get_xticks()]
        ax.set(xticklabels=xticks)
bars.py 文件源码 项目:astetik 作者: mikkokotila 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def bars(data,color='black',title=''):

    data = pd.DataFrame(data.value_counts())
    data = data.reset_index()
    data.columns = ['keyword','value']
    data['keyword'] = data['keyword'][1:]
    data = data.dropna()
    data = data.reset_index(drop=True)
    data = data.sort_values('value',ascending=False)

    sns.set_context("notebook", font_scale=1.2, rc={"lines.linewidth": 0})

    x = data.head(20)['keyword'].astype(str)
    y = data.head(20)['value'].astype(int)

    f, ax = plt.subplots(figsize=(16, 3))

    sns.set_style('white')

    ## change color of the bar based on value

    colors = [color if _y >=0 else 'red' for _y in y]

    sns.barplot(x, y, palette=colors, ax=ax)

    plt.title(title, fontsize=18, y=1.12, color="gray");

    ax.set_xticklabels('')
    ax.set_ylabel('')
    ax.set_xlabel('')
    ax.tick_params(axis='both', which='major', pad=30)

    for n, (label, _y) in enumerate(zip(x, y)):
        ax.annotate(
            s='{:.1f}'.format(abs(_y)),
            xy=(n, _y),
            ha='center',va='center',
            xytext=(0,-10),
            size=12,
            textcoords='offset points',
            color="white",
            weight="bold"
        )
    ax.set_yticklabels("");
    ax.set_xticklabels(data.head(20)['keyword'],rotation=25,ha="right");
    ax.tick_params(axis='both', which='major', pad=15)
    sns.despine(left=True)
chart.py 文件源码 项目:pygcam 作者: JGCRI 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def plotStackedBarsScalar(df, indexCol, columns, valuesCol, box=False, rotation=90,
                          zeroLine=False, title="", xlabel='', ylabel='', ncol=5, ygrid=False,
                          yticks=False, ymin=None, ymax=None, barWidth=0.5, legendY=None,
                          palette=None, outFile=None, sideLabel=False, labelColor=None,
                          yFormat=None, transparent=False, openFile=False, closeFig=True):
    '''
    Plot a stacked bar plot using data in df, given the index column, the
    column holding the values to pivot to columns, and the column holding
    the values. The argument 'ncol' specifies the number of columns with
    which to render the legend.
    '''
    #_logger.debug('plotStackedBarsScalar %s', sideLabel)
    setupPlot()

    # TBD: handle year values as columns to plot
    df2 = df[[indexCol, columns, valuesCol]].pivot(index=indexCol, columns=columns, values=valuesCol)

    setupPalette(len(df2.columns), pal=palette)

    fig, ax = plt.subplots(1, 1, figsize=(8, 4))
    df2.plot(kind='bar', stacked=True, ax=ax, grid=False, width=barWidth, rot=rotation)

    if box == False:
        sns.despine(left=True)

    if yticks:
        plt.tick_params(axis='y', direction='out', length=5, width=.75,
                        colors='k', left='on', right='off')

    if zeroLine:
        ax.axhline(0, color='k', linewidth=0.75, linestyle='-')

    if ygrid:
        ax.yaxis.grid(color='lightgrey', linestyle='solid')

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    legendY = -0.6 if legendY is None else legendY
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, legendY), ncol=ncol)

    if title:
        ax.set_title(title, y=1.05)

    if ymin is not None or ymax is not None:
        ax.set_autoscale_on(False)
        ax.set_ylim(ymin, ymax)

    _finalizeFigure(fig, ax, outFile=outFile, sideLabel=sideLabel, labelColor=labelColor,
                    yFormat=yFormat, transparent=transparent, openFile=openFile, closeFig=closeFig)

    return (fig, ax)
chart.py 文件源码 项目:pygcam 作者: JGCRI 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def plotStackedTimeSeries(df, index='region', xlabel='', ylabel='', ncol=5, box=False,
                          zeroLine=False, title="", ygrid=False, yticks=False,
                          ymin=None, ymax=None, barWidth=0.5, legendY=None, yearStep=5,
                          palette=None, outFile=None, sideLabel=False, labelColor=None,
                          yFormat=None, transparent=False, openFile=False, closeFig=True):
    #_logger.debug('plotStackedTimeSeries %s', sideLabel)
    setupPlot()
    df = dropExtraCols(df, inplace=False)
    grouped = df.groupby(index)
    df2 = grouped.aggregate(np.sum)
    df3 = df2.transpose()

    setupPalette(len(df3.columns), pal=palette)
    fig, ax = plt.subplots(1, 1, figsize=(8, 4))
    df3.plot(kind='bar', stacked=True, ax=ax, grid=False, width=barWidth)

    # space out year labels to every 5 years
    locs, labels = plt.xticks()
    yearCols = filter(str.isdigit, df.columns)

    if int(yearCols[1]) - int(yearCols[0]) == 1 and yearStep > 1:
        plt.xticks(locs[::yearStep], yearCols[::yearStep])

    if box == False:
        sns.despine(left=True)

    if yticks:
        plt.tick_params(axis='y', direction='out', length=5, width=.75,
                        colors='k', left='on', right='off')

    lines = ax.get_lines()
    if lines:
        lines[0].set_visible(False)    # get rid of ugly dashed line

    if zeroLine:
        ax.axhline(0, color='k', linewidth=0.75, linestyle='-')

    if ygrid:
        ax.yaxis.grid(color='lightgrey', linestyle='solid')

    if ymin is not None or ymax is not None:
        ax.set_autoscale_on(False)
        ax.set_ylim(ymin, ymax)

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    legendY = -0.2 if legendY is None else legendY
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, legendY), ncol=ncol)

    if title:
        ax.set_title(title, y=1.05)

    _finalizeFigure(fig, ax, outFile=outFile, sideLabel=sideLabel, labelColor=labelColor,
                    yFormat=yFormat, transparent=transparent, openFile=openFile, closeFig=closeFig)

    return (fig, ax)
chart.py 文件源码 项目:pygcam 作者: JGCRI 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def plotTimeSeries(df, xlabel='', ylabel='', box=False, zeroLine=False, title="", ygrid=False,
                   yticks=False, ymin=None, ymax=None, legend=False, legendY=None, yearStep=5,
                   outFile=None, sideLabel=False, labelColor=None, yFormat=None, transparent=False,
                   openFile=False, closeFig=True):

    setupPlot()
    fig, ax = plt.subplots(1, 1, figsize=(8, 4))

    yearCols = filter(str.isdigit, df.columns)
    x = map(int, yearCols)
    y = list(df[yearCols].iloc[0])
    plt.plot(x, y)

    # TBD: see if this is worth doing
    # space out year labels to every 5 years
    #locs, labels = plt.xticks()
    #plt.xticks(locs[::yearStep], yearCols[::yearStep])

    if box == False:
        sns.despine(left=True)

    if yticks:
        plt.tick_params(axis='y', direction='out', length=5, width=.75,
                        colors='k', left='on', right='off')

    if zeroLine:
        ax.axhline(0, color='k', linewidth=0.75, linestyle='-')

    if ygrid:
        ax.yaxis.grid(color='lightgrey', linestyle='solid')

    if ymin is not None or ymax is not None:
        ax.set_autoscale_on(False)
        ax.set_ylim(ymin, ymax)

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    if legend:
        legendY = -0.2 if legendY is None else legendY
        ax.legend(loc='upper center', bbox_to_anchor=(0.5, legendY))
    else:
        ax.legend([], frameon=False)

    if title:
        ax.set_title(title, y=1.05)

    _finalizeFigure(fig, ax, outFile=outFile, sideLabel=sideLabel, labelColor=labelColor,
                    yFormat=yFormat, transparent=transparent, openFile=openFile, closeFig=closeFig)

    return (fig, ax)
screen_dynamics.py 文件源码 项目:crop-seq 作者: epigen 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def gRNA_maplot(s1, s2, prefix="", text=False, n_labels=30):
    # Rank of gRNA change
    fig, axis = plt.subplots(3, 2, sharex=True, sharey=True, figsize=(8, 8))
    axis = axis.flatten()

    for i, screen in enumerate(s2.columns[::-1]):
        x = s1.join(s2)  # .fillna(0)
        x = x.iloc[np.random.permutation(len(x))]

        x = x.ix[x.index[~x.index.str.contains("Wnt")]]
        if prefix.startswith("mid_screen-"):
            b = x["gDNA_Jurkat"]
        else:
            b = x["plasmid_pool_TCR"]
        x = x.fillna(0)
        b = b.fillna(0)

        M = np.log2(x[screen] * b) / 2.
        M = M.replace({-np.inf: 0, np.inf: 9})
        fc = np.log2(1 + x[screen]) - np.log2(1 + b)

        fc.name = screen
        if i == 0:
            xx = pd.DataFrame(fc)
        else:
            xx = xx.join(fc, how="outer")

        colors = pd.DataFrame()
        colors[sns.color_palette("colorblind")[0]] = x.index.str.contains("Wnt")
        colors[sns.color_palette("colorblind")[1]] = x.index.str.contains("CTRL")
        colors[sns.color_palette("colorblind")[2]] = x.index.str.contains("Tcr")
        colors[sns.color_palette("colorblind")[3]] = x.index.str.contains("Ess")
        colors = colors.apply(lambda x: x[x].index.tolist()[0], axis=1).tolist()

        axis[i].scatter(M, fc, color=colors, alpha=0.5)
        if text:
            for j in x[x.index.str.contains("ETS1|GATA3|RUNX1")].index:
                axis[i].text(
                    M.ix[j],
                    fc.ix[j],
                    j)

        axis[i].axhline(y=0, color='black', linestyle='--', lw=0.5)

        axis[i].set_title(screen)

    for i in range(0, len(axis), 2):
        axis[i].set_ylabel("M")
    for ax in axis[-2:]:
        ax.set_xlabel("A")
    sns.despine(fig)
    fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.maplot.{}svg".format(prefix, "text." if text else "")), bbox_inches="tight")
    fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.maplot.{}pdf".format(prefix, "text." if text else "")), bbox_inches="tight")
screen_dynamics.py 文件源码 项目:crop-seq 作者: epigen 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def gRNA_rank(s1, s2, prefix="", text=False, n_labels=30):
    # Rank of gRNA change
    fig, axis = plt.subplots(3, 2, sharex=True, sharey=True, figsize=(8, 8))
    axis = axis.flatten()

    for i, screen in enumerate(s2.columns[::-1]):
        x = s1.join(s2)  # .fillna(0)
        x = x.iloc[np.random.permutation(len(x))]

        x = x.ix[x.index[~x.index.str.contains("Wnt")]]
        if prefix.startswith("mid_screen-"):
            b = x["gDNA_Jurkat"]
        else:
            b = x["plasmid_pool_TCR"]

        x = x.fillna(0)
        b = b.fillna(0)

        fc = np.log2(1 + x[screen]) - np.log2(1 + b)

        fc.name = screen
        if i == 0:
            xx = pd.DataFrame(fc)
        else:
            xx = xx.join(fc, how="outer")

        colors = pd.DataFrame()
        colors[sns.color_palette("colorblind")[0]] = x.index.str.contains("Wnt")
        colors[sns.color_palette("colorblind")[1]] = x.index.str.contains("CTRL")
        colors[sns.color_palette("colorblind")[2]] = x.index.str.contains("Tcr")
        colors[sns.color_palette("colorblind")[3]] = x.index.str.contains("Ess")
        colors = colors.apply(lambda x: x[x].index.tolist()[0], axis=1).tolist()

        axis[i].scatter(fc.rank(ascending=False, method="first"), fc, color=colors, alpha=0.5)
        if text:
            for j in x[x.index.str.contains("ETS1|GATA3|RUNX1")].index:
                axis[i].text(
                    fc.rank(ascending=False, method="first").ix[j],
                    fc.ix[j],
                    j)
        axis[i].axhline(y=0, color='black', linestyle='--', lw=0.5)

        axis[i].set_title(screen)

    for i in range(0, len(axis), 2):
        axis[i].set_ylabel("gRNA fold-change")
    for ax in axis[-2:]:
        ax.set_xlabel("gRNA rank")
    sns.despine(fig)
    fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.rank.{}svg".format(prefix, "text." if text else "")), bbox_inches="tight")
    fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.rank.{}pdf".format(prefix, "text." if text else "")), bbox_inches="tight")

    # Save ranked list
    xx.to_csv(os.path.join(results_dir, "gRNA_counts.norm.{}.rank.csv".format(prefix)), index=True)

    # Save ranked list of gene-level measurements, reduced by mean and min
    m = pd.merge(xx.reset_index(), guide_annotation[["oligo_name", "gene"]], left_on="gRNA_name", right_on="oligo_name").drop("oligo_name", axis=1).set_index(["gene", "gRNA_name"])
    m.groupby(level=[0]).mean().to_csv(os.path.join(results_dir, "gRNA_counts.norm.{}.gene_mean.rank.csv".format(prefix)), index=True)
    m.groupby(level=[0]).min().to_csv(os.path.join(results_dir, "gRNA_counts.norm.{}.gene_min.rank.csv".format(prefix)), index=True)
screen_dynamics.py 文件源码 项目:crop-seq 作者: epigen 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def gRNA_rank_stimulus(xx, s2, prefix=""):
    # Difference between unstimulated/stimulated
    fig, axis = plt.subplots(1, 3, sharex=False, sharey=True, figsize=(12, 3))
    axis = axis.flatten()

    for i, screen in enumerate(s2.columns[::-1]):
        x = s1.join(s2)  # .fillna(0)
        x = x.iloc[np.random.permutation(len(x))]

        if ("TCR" in screen) or ("Jurkat" in screen):
            x = x.ix[x.index[~x.index.str.contains("Wnt")]]
            if prefix.startswith("mid_screen-"):
                b = x["gDNA_Jurkat"]
            else:
                b = x["plasmid_pool_TCR"]
        elif ("WNT" in screen) or ("HEK" in screen):
            x = x.ix[x.index[~x.index.str.contains("Tcr")]]
            if prefix.startswith("mid_screen-"):
                if "_4_" in prefix:
                    b = x["gDNA_HEKclone4"]
                else:
                    b = x["gDNA_HEKclone6"]
            else:
                b = x["plasmid_pool_WNT"]
        fc = np.log2(1 + x[screen]) - np.log2(1 + b)

        fc.name = screen
        if i == 0:
            xx = pd.DataFrame(fc)
        else:
            xx = xx.join(fc, how="outer")

    screens = s2.columns[::-1]
    for i in range(0, len(s2.columns), 2):
        fc = (xx[screens[i + 1]] - xx[screens[i]]).dropna()

        fc.name = screens[i + 1]
        if i == 0:
            axis[i].set_ylabel("gRNA fold-change (stimulated / unstimulated)")
            xxx = pd.DataFrame(fc)
        else:
            xxx = xxx.join(fc, how="outer")

        colors = pd.DataFrame()
        colors[sns.color_palette("colorblind")[0]] = fc.index.str.contains("Wnt")
        colors[sns.color_palette("colorblind")[1]] = fc.index.str.contains("CTRL")
        colors[sns.color_palette("colorblind")[2]] = fc.index.str.contains("Tcr")
        colors[sns.color_palette("colorblind")[3]] = fc.index.str.contains("Ess")
        colors = colors.apply(lambda j: j[j].index.tolist()[0], axis=1).tolist()

        axis[i].scatter(fc.rank(ascending=False, method="first"), fc, color=colors, alpha=0.5)
        axis[i].axhline(y=0, color='black', linestyle='--', lw=0.5)
        axis[i].set_title(re.sub("_stimulated", "", screens[i + 1]))
        axis[i].set_xlabel("gRNA rank (stimulated / unstimulated)")

    sns.despine(fig)
    fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.rank.diff_condition.svg".format(prefix)), bbox_inches="tight")

    xxx.columns = xxx.columns.str.extract("(.*)_stimulated")
    xxx.to_csv(os.path.join(results_dir, "gRNA_counts.norm.{}.rank.diff_condition.csv".format(prefix)), index=True)


问题


面经


文章

微信
公众号

扫码关注公众号