python类regplot()的实例源码

business_case_solver.py 文件源码 项目:themarketingtechnologist 作者: thomhopmans 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def visualize_results(self):
        # Visualize logistic curve using seaborn
        sns.set(style="darkgrid")
        sns.regplot(x="pageviews_cumsum",
                    y="is_conversion",
                    data=self.df,
                    logistic=True,
                    n_boot=500,
                    y_jitter=.01,
                    scatter_kws={"s": 60})
        sns.set(font_scale=1.3)
        sns.plt.title('Logistic Regression Curve')
        sns.plt.ylabel('Conversion probability')
        sns.plt.xlabel('Cumulative sum of pageviews')
        sns.plt.subplots_adjust(right=0.93, top=0.90, left=0.10, bottom=0.10)
        sns.plt.show()
samestats.py 文件源码 项目:same-stats-different-graphs 作者: jmatejka 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def show_scatter(df, xlim=(-5, 105), ylim=(-5, 105), color="black", marker="o", reg_fit=False):
    """Create a scatter plot of the data

    Args:
        df (pd.DataFrame):      The data set to plot
        xlim ((float, float)):  The x-axis limits
        ylim ((float, float)):  The y-axis limits
        color (str):            The color of the scatter points
        marker (str):           The marker style for the scatter points
        reg_fit (bool):         Whether to plot a linear regression on the graph
    """
    sns.regplot(
        x="x",
        y="y",
        data=df,
        ci=None,
        fit_reg=reg_fit,
        marker=marker,
        scatter_kws={"s": 50, "alpha": 0.7, "color": color},
        line_kws={"linewidth": 4, "color": "red"})
    plt.xlim(xlim)
    plt.ylim(ylim)
    plt.tight_layout()
regression_modeling.py 文件源码 项目:-Python-Analysis_of_wine_quality 作者: ekolik 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def basic_linear(wine_set):
    scat0 = seaborn.regplot(x="volatile_acidity", y="quality", fit_reg=True, data=wine_set)
    plt.xlabel("Amount of volatile acidity in wine")
    plt.ylabel("Quality level of wine (0-10 scale)")
    plt.title("Association between the amount of volatile acidity in wine and the quality of wine")
    plt.show()

    # ----------- centering the explanatory variable by subrtacting the mean
    f_acidity_mean = wine_set["volatile_acidity"].mean()
    print("mean of the volatile acidity variable = ", f_acidity_mean)
    wine_set["volatile_acidity"] = wine_set["volatile_acidity"] - f_acidity_mean
    print("mean of the volatile acidity variable after normalization = ", wine_set["volatile_acidity"].mean())

    print ("\nOLS regression model for the association between the amount of volatile acidity in wine and the quality of wine:")
    model1 = smf.ols(formula="quality ~ volatile_acidity", data=wine_set)
    results1 = model1.fit()
    print(results1.summary())


# call(basic_linear)


# #___________________________________ Multiple Regression___________________________________________
base.py 文件源码 项目:auxiliary-deep-generative-models 作者: larsmaaloee 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def plot_eval(self, eval_dict, labels, path_extension=""):
        """
        Plot the loss function in a overall plot and a zoomed plot.
        :param path_extension: If the plot should be saved in an incremental way.
        """

        def plot(x, y, fit, label):
            sns.regplot(np.array(x), np.array(y), fit_reg=fit, label=label, scatter_kws={"s": 5})

        plt.clf()
        plt.subplot(211)
        idx = np.array(eval_dict.values()[0]).shape[0]
        x = np.array(eval_dict.values())
        for i in range(idx):
            plot(eval_dict.keys(), x[:, i], False, labels[i])
        plt.legend()
        plt.subplot(212)
        for i in range(idx):
            plot(eval_dict.keys()[-int(len(x) * 0.25):], x[-int(len(x) * 0.25):][:, i], True, labels[i])
        plt.xlabel('Epochs')
        plt.savefig(paths.get_plot_evaluation_path_for_model(self.model.get_root_path(), path_extension+".png"))
business_case_solver_without_classes.py 文件源码 项目:themarketingtechnologist 作者: thomhopmans 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def visualize_results(df):
    # Visualize logistic curve using seaborn
    sns.set(style="darkgrid")
    sns.regplot(x="pageviews_cumsum",
                y="is_conversion",
                data=df,
                logistic=True,
                n_boot=500,
                y_jitter=.01,
                scatter_kws={"s": 60})
    sns.set(font_scale=1.3)
    sns.plt.title('Logistic Regression Curve')
    sns.plt.ylabel('Conversion probability')
    sns.plt.xlabel('Cumulative sum of pageviews')
    sns.plt.subplots_adjust(right=0.93, top=0.90, left=0.10, bottom=0.10)
    sns.plt.show()


# Run the final program
base.py 文件源码 项目:VAESSL 作者: lovecambi 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def plot_eval(self, eval_dict, labels, path_extension=""):
        """
        Plot the loss function in a overall plot and a zoomed plot.
        :param path_extension: If the plot should be saved in an incremental way.
        """

        def plot(x, y, fit, label):
            sns.regplot(np.array(x), np.array(y), fit_reg=fit, label=label, scatter_kws={"s": 5})

        plt.clf()
        plt.subplot(211)
        idx = np.array(eval_dict.values()[0]).shape[0]
        x = np.array(eval_dict.values())
        for i in range(idx):
            plot(eval_dict.keys(), x[:, i], False, labels[i])
        plt.legend()
        plt.subplot(212)
        for i in range(idx):
            plot(eval_dict.keys()[-int(len(x) * 0.25):], x[-int(len(x) * 0.25):][:, i], True, labels[i])
        plt.xlabel('Epochs')
        plt.savefig(paths.get_plot_evaluation_path_for_model(self.model.get_root_path(), path_extension+".png"))
samestats.py 文件源码 项目:same-stats-different-graphs 作者: jmatejka 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def show_scatter_and_results(df):
    """Creates a plot which shows both the plot and the statistical summary

    Args:
        df (pd.DataFrame):  The data set to plot
        labels (List[str]): The labels to use for
    """
    plt.figure(figsize=(12, 5))
    sns.regplot("x", y="y", data=df, ci=None, fit_reg=False,
                scatter_kws={"s": 50, "alpha": 0.7, "color": "black"})
    plt.xlim(-5, 105)
    plt.ylim(-5, 105)
    plt.tight_layout()

    res = get_values(df)
    fs = 30
    y_off = -5

    labels = ("X Mean", "Y Mean", "X SD", "Y SD", "Corr.")
    max_label_length = max([len(l) for l in labels])

    # If `max_label_length = 10`, this string will be "{:<10}: {:0.9f}", then we
    # can pull the `.format` method for that string to reduce typing it
    # repeatedly
    formatter = '{{:<{pad}}}: {{:0.9f}}'.format(pad=max_label_length).format
    corr_formatter = '{{:<{pad}}}: {{:+.9f}}'.format(pad=max_label_length).format

    opts = dict(fontsize=fs, alpha=0.3)
    plt.text(110, y_off + 80, formatter(labels[0], res[0])[:-2], **opts)
    plt.text(110, y_off + 65, formatter(labels[1], res[1])[:-2], **opts)
    plt.text(110, y_off + 50, formatter(labels[2], res[2])[:-2], **opts)
    plt.text(110, y_off + 35, formatter(labels[3], res[3])[:-2], **opts)
    plt.text(110, y_off + 20, corr_formatter(labels[4], res[4], pad=max_label_length)[:-2], **opts)

    opts['alpha'] = 1
    plt.text(110, y_off + 80, formatter(labels[0], res[0])[:-7], **opts)
    plt.text(110, y_off + 65, formatter(labels[1], res[1])[:-7], **opts)
    plt.text(110, y_off + 50, formatter(labels[2], res[2])[:-7], **opts)
    plt.text(110, y_off + 35, formatter(labels[3], res[3])[:-7], **opts)
    plt.text(110, y_off + 20, corr_formatter(labels[4], res[4], pad=max_label_length)[:-7], **opts)
    plt.tight_layout(rect=[0, 0, 0.57, 1])
data_analysis.py 文件源码 项目:-Python-Analysis_of_wine_quality 作者: ekolik 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def pearson(wine_set):
    scat1 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=wine_set)
    plt.xlabel("Density of wine")
    plt.ylabel("Residual sugar in wine, gram")
    plt.title("Association between wine's density and residual sugar")
    plt.show()

    print(scipy.stats.pearsonr(wine_set['density'], wine_set["residual_sugar"]))

# print('----------------Pearson Correlation------------------------')
# call(pearson)


# -----------------------------------------Exploring Statistical Interactions------------------
data_analysis.py 文件源码 项目:-Python-Analysis_of_wine_quality 作者: ekolik 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def explore(wine_set):
    low = wine_set[wine_set['quality'] <= 5]
    medium = wine_set[(wine_set['quality'] == 6) | (wine_set['quality'] == 7)]
    high = wine_set[wine_set['quality'] > 7]

    print('association between wine`s density and residual sugar for wines \nof `low` quality')
    print(scipy.stats.pearsonr(low['density'], low["residual_sugar"]))
    print('\nof `medium` quality')
    print(scipy.stats.pearsonr(medium['density'], medium["residual_sugar"]))
    print('\nof `high` quality')
    print(scipy.stats.pearsonr(high['density'], high["residual_sugar"]))

    scat0 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=low)
    plt.xlabel("Density of wine")
    plt.ylabel("Residual sugar in wine, gram")
    plt.title("Association between wine's density and residual sugar for wines of `low` quality")
    plt.show()

    scat0 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=medium)
    plt.xlabel("Density of wine")
    plt.ylabel("Residual sugar in wine, gram")
    plt.title("Association between wine's density and residual sugar for wines of `medium` quality")
    plt.show()

    scat0 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=high)
    plt.xlabel("Density of wine")
    plt.ylabel("Residual sugar in wine, gram")
    plt.title("Association between wine's density and residual sugar for wines of `high` quality")
    plt.show()
plot.py 文件源码 项目:pyem 作者: asarnow 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def plot_angle_comparison(disc, whole, lgdtext=None, fname=None):
    # if fname is not None:
    #     mpl.rc("savefig", dpi=300)

    if lgdtext is None:
        lgdtext = [u"Disc-Only Angle (deg)", 'Whole TRPV1 Angle (deg)']

    sns.set(font_scale=3)
    f, ax = plt.subplots(1, 3, figsize=(30, 10))
    sns.regplot(whole["rlnAngleRot"], disc["rlnAngleRot"], fit_reg=False, scatter_kws={"s": 16}, ax=ax[0])
    ax[0].set_xlim((-45, 45))
    ax[0].set_ylim((-45, 45))
    ax[0].set_xticks(np.arange(-45, 46, 15))
    ax[0].set_yticks(np.arange(-45, 46, 15))
    ax[0].xaxis.label.set_visible(False)
    ax[0].set_ylabel(lgdtext[0])
    ax[0].set_title(u"$\phi$ ( $Z$ )", y=1.01)

    sns.regplot(whole["rlnAngleTilt"], disc["rlnAngleTilt"], fit_reg=False, scatter_kws={"s": 16}, ax=ax[1])
    ax[1].set_xlim((0, 180))
    ax[1].set_ylim((0, 180))
    ax[1].set_xticks(np.arange(0, 181, 30))
    ax[1].set_yticks(np.arange(0, 181, 30))
    ax[1].xaxis.label.set_visible(False)
    ax[1].yaxis.label.set_visible(False)
    ax[1].set_title(u"$\theta$ ( $Y'$ )", y=1.01)

    sns.regplot(whole["rlnAnglePsi"], disc["rlnAnglePsi"], fit_reg=False, scatter_kws={"s": 16}, ax=ax[2])
    ax[2].set_xlim((-180, 180))
    ax[2].set_ylim((-180, 180))
    ax[2].set_xticks(np.arange(-180, 181, 45))
    ax[2].set_yticks(np.arange(-180, 181, 45))
    ax[2].xaxis.label.set_visible(False)
    ax[2].yaxis.label.set_visible(False)
    ax[2].set_title(u"$\psi$ ( $Z''$  )", y=1.01)
    f.text(0.5, -0.05, lgdtext[1], ha='center', fontsize=36)
    f.tight_layout(pad=1., w_pad=-1.5, h_pad=0.5)
    if fname is not None:
        f.savefig(fname, dpi=300)
        # mpl.rc("savefig", dpi=80)
    return f, ax
regression.py 文件源码 项目:astetik 作者: mikkokotila 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def regression(data,x,y,xscale='linear',yscale='linear'):


    sns.set_context("notebook", font_scale=.8, rc={"lines.linewidth": 0})
    sns.set_style('white')

    g = sns.regplot(x=x, y=y, data=data)

    plt.tick_params(axis='both', which='major', pad=10)

    g.set(xscale=xscale)
    g.set(yscale=yscale)

    sns.despine()
cohort.py 文件源码 项目:cohorts 作者: hammerlab 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def plot_correlation(self, on, x_col=None, plot_type="jointplot", stat_func=pearsonr, show_stat_func=True, plot_kwargs={}, **kwargs):
        """Plot the correlation between two variables.

        Parameters
        ----------
        on : list or dict of functions or strings
            See `cohort.load.as_dataframe`
        x_col : str, optional
            If `on` is a dict, this guarantees we have the expected ordering.
        plot_type : str, optional
            Specify "jointplot", "regplot", "boxplot", or "barplot".
        stat_func : function, optional.
            Specify which function to use for the statistical test.
        show_stat_func : bool, optional
            Whether or not to show the stat_func result in the plot itself.
        plot_kwargs : dict, optional
            kwargs to pass through to plotting functions.
        """
        if plot_type not in ["boxplot", "barplot", "jointplot", "regplot"]:
            raise ValueError("Invalid plot_type %s" % plot_type)
        plot_cols, df = self.as_dataframe(on, return_cols=True, **kwargs)
        if len(plot_cols) != 2:
            raise ValueError("Must be comparing two columns, but there are %d columns" % len(plot_cols))
        for plot_col in plot_cols:
            df = filter_not_null(df, plot_col)
        if x_col is None:
            x_col = plot_cols[0]
            y_col = plot_cols[1]
        else:
            if x_col == plot_cols[0]:
                y_col = plot_cols[1]
            else:
                y_col = plot_cols[0]
        series_x = df[x_col]
        series_y = df[y_col]
        coeff, p_value = stat_func(series_x, series_y)
        if plot_type == "jointplot":
            plot = sb.jointplot(data=df, x=x_col, y=y_col,
                                stat_func=stat_func if show_stat_func else None,
                                **plot_kwargs)
        elif plot_type == "regplot":
            plot = sb.regplot(data=df, x=x_col, y=y_col,
                              **plot_kwargs)
        elif plot_type == "boxplot":
            plot = stripboxplot(data=df, x=x_col, y=y_col, **plot_kwargs)
        else:
            plot = sb.barplot(data=df, x=x_col, y=y_col, **plot_kwargs)
        return CorrelationResults(coeff=coeff, p_value=p_value, stat_func=stat_func,
                                  series_x=series_x, series_y=series_y, plot=plot)
yes_no.py 文件源码 项目:guesswhat 作者: GuessWhatGame 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def __init__(self, path, games, logger, suffix):
        super(YesNo, self).__init__(path, self.__class__.__name__, suffix)

        # basic storage for statistics
        yes_no = collections.defaultdict(list)
        number_yesno = collections.defaultdict(int)

        MAX = 15

        for i, game in enumerate(games):

            if game.status == "incomplete":
                continue

            yn = []
            for a in game.answers:

                a = a.lower()
                if a == "yes":
                    number_yesno["yes"] +=1
                    yn.append(1)
                elif a == "no":
                    number_yesno["no"] += 1
                    yn.append(0)
                else:
                    number_yesno["n/a"] += 1
                    yn.append(0.5)

            no_question = len(game.answers)
            yes_no[no_question].append(yn)


        sns.set(style="whitegrid")
        max_no_question = min(MAX, max(yes_no.keys())) + 1

        fig = None
        for key, yn in yes_no.items():

            no_question = int(key)
            yn_mean = np.array(yn).mean(axis=0)

            if no_question < max_no_question :
                fig = sns.regplot(x=np.arange(1, no_question + 1, 1), y=yn_mean, lowess=True, scatter=False)

        #dummy legend
        sns.regplot(x=np.array([-1]), y=np.array([-1]), scatter=False, line_kws={'linestyle':'-'}, label="Ratio yes-no",ci=None, color="g")
        fig.legend(loc="best", fontsize='x-large')

        fig.set_xlim(1, max_no_question)
        fig.set_ylim(0.1, 1)
        fig.set_xlabel("Number of questions", {'size': '14'})
        fig.set_ylabel('Ratio yes-no', {'size': '14'})
question_object.py 文件源码 项目:guesswhat 作者: GuessWhatGame 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __init__(self, path, games, logger, suffix):
        super(QuestionVsObject, self).__init__(path, self.__class__.__name__, suffix)

        ratio_q_object = []
        for game in games:

            no_object = len(game.objects)
            no_question = len(game.questions)

            ratio_q_object.append([no_object,no_question])

        ratio_q_object = np.array(ratio_q_object)

        sns.set(style="white")

        x = np.linspace(3, 20, 80)
        counter = collections.defaultdict(list)

        for k, val in ratio_q_object:
            counter[k] += [val]

        arr = np.zeros( [4, 21])
        for k, val in counter.items():
            if len(val) > 0:
                arr[0,k] = k
                arr[1,k] = np.mean(val)

                # Std
                arr[2, k] = np.std(val)

                # confidence interval 95%
                arr[3,k] = 1.95*np.std(val)/np.sqrt(len(val))


        #plt.plot(arr[0,:],arr[1,:] , 'b.', label="Human behavior")
        sns.regplot(x=ratio_q_object[:, 0], y=ratio_q_object[:, 1], x_ci=None, x_bins=20, order=4,  label="Human behavior", marker="o", line_kws={'linestyle':'-'})
        plt.fill_between(x=arr[0,:], y1=arr[1,:]-arr[2,:], y2=arr[1,:]+arr[2,:], alpha=0.2)

        sns.regplot    (x=x, y=np.log2(x), order=6, scatter=False, label="y = log2(x)", line_kws={'linestyle':'--'})
        f = sns.regplot(x=x, y=x         , order=1, scatter=False, label="y = x"      , line_kws={'linestyle':'--'})

        f.legend(loc="best", fontsize='x-large')
        f.set_xlim(3,20)
        f.set_ylim(0,20)
        f.set_xlabel("Number of objects", {'size':'14'})
        f.set_ylabel("Number of questions", {'size':'14'})
plots.py 文件源码 项目:Comparative-Annotation-Toolkit 作者: ComparativeGenomicsToolkit 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def improvement_plot(consensus_data, ordered_genomes, improvement_tgt):
    def do_kdeplot(x, y, ax, n_levels=None, bw='scott'):
        try:
            sns.kdeplot(x, y, ax=ax, cut=0, cmap='Purples_d', shade=True, shade_lowest=False, n_levels=n_levels, bw=bw,
                        rasterized=True)
        except:
            logger.warning('Unable to do a KDE fit to AUGUSTUS improvement.')
            pass

    with improvement_tgt.open('w') as outf, PdfPages(outf) as pdf, sns.axes_style("whitegrid"):
        for genome in ordered_genomes:
            data = pd.DataFrame(consensus_data[genome]['Evaluation Improvement']['changes'])
            unchanged = consensus_data[genome]['Evaluation Improvement']['unchanged']
            if len(data) == 0:
                continue
            data.columns = ['transMap original introns',
                            'transMap intron annotation support',
                            'transMap intron RNA support',
                            'Original introns',
                            'Intron annotation support',
                            'Intron RNA support',
                            'transMap alignment goodness',
                            'Alignment goodness']
            fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2)
            for ax in [ax1, ax2, ax3]:  # goodness plots are allowed to auto-set scale
                ax.set_xlim(0, 100)
                ax.set_ylim(0, 100)
            goodness_min = min(data['Alignment goodness'])
            ax4.set_xlim(goodness_min, 100)
            ax4.set_ylim(goodness_min, 100)
            do_kdeplot(data['transMap original introns'], data['Original introns'], ax1, n_levels=25, bw=2)
            sns.regplot(x=data['transMap original introns'], y=data['Original introns'], ax=ax1,
                        color='#A9B36F', scatter_kws={"s": 3, 'alpha': 0.7, 'rasterized': True}, fit_reg=False)
            do_kdeplot(data['transMap intron annotation support'], data['Intron annotation support'], ax2,
                       n_levels=25, bw=2)
            sns.regplot(x=data['transMap intron annotation support'], y=data['Intron annotation support'], ax=ax2,
                        color='#A9B36F', scatter_kws={"s": 3, 'alpha': 0.7, 'rasterized': True}, fit_reg=False)
            do_kdeplot(data['transMap intron RNA support'], data['Intron RNA support'], ax3, n_levels=25, bw=2)
            sns.regplot(x=data['transMap intron RNA support'], y=data['Intron RNA support'], ax=ax3,
                        color='#A9B36F', scatter_kws={"s": 3, 'alpha': 0.7, 'rasterized': True}, fit_reg=False)
            do_kdeplot(data['transMap alignment goodness'], data['Alignment goodness'], ax4, n_levels=20, bw=1)
            sns.regplot(x=data['transMap alignment goodness'], y=data['Alignment goodness'], ax=ax4,
                        color='#A9B36F', scatter_kws={"s": 3, 'alpha': 0.7, 'rasterized': True}, fit_reg=False)
            fig.suptitle('AUGUSTUS metric improvements for {:,} transcripts in {}.\n'
                         '{:,} transMap transcripts were chosen.'.format(len(data), genome, unchanged))
            for ax in [ax1, ax2, ax3, ax4]:
                ax.set(adjustable='box-forced', aspect='equal')
            fig.subplots_adjust(hspace=0.3)
            multipage_close(pdf, tight_layout=False)


问题


面经


文章

微信
公众号

扫码关注公众号