python类boxplot()的实例源码

metrics_acdc.py 文件源码 项目:acdc_segmenter 作者: baumgach 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def boxplot_metrics(df, eval_dir):
    """
    Create summary boxplots of all geometric measures.

    :param df:
    :param eval_dir:
    :return:
    """

    boxplots_file = os.path.join(eval_dir, 'boxplots.eps')

    fig, axes = plt.subplots(3, 1)
    fig.set_figheight(14)
    fig.set_figwidth(7)

    sns.boxplot(x='struc', y='dice', hue='phase', data=df, palette="PRGn", ax=axes[0])
    sns.boxplot(x='struc', y='hd', hue='phase', data=df, palette="PRGn", ax=axes[1])
    sns.boxplot(x='struc', y='assd', hue='phase', data=df, palette="PRGn", ax=axes[2])

    plt.savefig(boxplots_file)
    plt.close()

    return 0
plot_errors_boxplot.py 文件源码 项目:MDI 作者: rafaelvalle 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def plot(params_dir):
    model_dirs = [name for name in os.listdir(params_dir)
                  if os.path.isdir(os.path.join(params_dir, name))]

    df = defaultdict(list)
    for model_dir in model_dirs:
        df[re.sub('_bin_scaled_mono_True_ratio', '', model_dir)] = [
            dd.io.load(path)['best_epoch']['validate_objective']
            for path in glob.glob(os.path.join(
                params_dir, model_dir) + '/*.h5')]

    df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in df.iteritems()]))
    df.to_csv(os.path.basename(os.path.normpath(params_dir)))
    plt.figure(figsize=(16, 4), dpi=300)
    g = sns.boxplot(df)
    g.set_xticklabels(df.columns, rotation=45)
    plt.tight_layout()
    plt.savefig('{}_errors_box_plot.png'.format(
        os.path.join(IMAGES_DIRECTORY,
                     os.path.basename(os.path.normpath(params_dir)))))
dab_deconv_area.py 文件源码 项目:DAB_analyzer 作者: meklon 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def plot_group(data_frame, path_output):
    # optional import
    import seaborn as sns
    path_output_image = os.path.join(path_output, "summary_statistics.png")

    # # Plotting swarmplot
    # plt.figure(num=None, figsize=(15, 7), dpi=120)
    # sns.set_style("whitegrid")
    #
    # plt.title('Violin plot with single measurements')
    # sns.violinplot(x="Group", y="DAB+ area", data=data_frame, inner=None)
    # sns.swarmplot(x="Group", y="DAB+ area", data=data_frame, color="w", alpha=.5)
    # plt.savefig(path_output_image)
    #
    # plt.tight_layout()

    sns.set_style("whitegrid")
    sns.set_context("talk")
    plt.figure(num=None, figsize=(15, 7), dpi=120)
    plt.ylim(0, 100)
    plt.title('Box plot')
    sns.boxplot(x="Group", y="DAB+ area, %", data=data_frame)

    plt.tight_layout()
    plt.savefig(path_output_image, dpi=300)
stats.py 文件源码 项目:temci 作者: parttimenerd 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def boxplot(self, fig_width: Number, fig_height: Number = None):
        """
        Creates a (horizontal) box plot comparing all single object for a given property.

        :param fig_width: width of the figure in cm
        :param fig_height: height of the figure in cm, if None it is calculated from the figure width using the
                           aesthetic ratio
        """
        import seaborn as sns
        import matplotlib.pyplot as plt
        self.reset_plt()
        if fig_height is None:
            fig_height = self._height_for_width(fig_width)
        self._fig = plt.figure(figsize=self._fig_size_cm_to_inch(fig_width, fig_height))
        df = self.get_data_frame()
        sns.boxplot(data=df, orient="h")
plot_journal.py 文件源码 项目:nmt-repr-analysis 作者: boknilev 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def plot_averages(df, figname, fignum, use_en_source=True, num_accs=3):

    plt.figure(fignum)
    if use_en_source:
        df_side = df[(df.source == 'en') & (df.target != 'en')]
        layers = np.concatenate([[i]*5 for i in range(5)] * num_accs)
    else:
        df_side = df[(df.source != 'en') & (df.target == 'en')]
        layers = list(range(5))*5*num_accs

    accs = get_accs_from_df(df_side, col_pref='acc')
    flat_accs = np.concatenate(accs)
    df_plot = pd.DataFrame({'Layer' : layers, 'Accuracy' : flat_accs }) 
    #print(df_plot)
    sns.boxplot(x='Layer', y='Accuracy', data=df_plot)

    plt.tight_layout()
    plt.savefig(figname)
    return fignum + 1
plot_journal.py 文件源码 项目:nmt-repr-analysis 作者: boknilev 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def plot_averages_by_type(df, figname, fignum, use_en_source=True, pointplot=True, layer0=True):

    plt.figure(fignum)
    if use_en_source:
        df_side = df[(df.source == 'en') & (df.target != 'en')]
    else:
        df_side = df[(df.source != 'en') & (df.target == 'en')]
    if not layer0:
        df_side = df_side[df_side.layer != '0']


    plotfunc = sns.pointplot if pointplot else sns.boxplot
    if pointplot:
        plotfunc(x='accuracy', y='relation', hue='layer', data=df_side, join=False)
    else:
        plotfunc(x='accuracy', y='relation', hue='layer', data=df_side)
    plt.xlabel('Accuracy')
    plt.ylabel('')

    plt.tight_layout()
    plt.savefig(figname)
    return fignum + 1
plotting.py 文件源码 项目:PythonPackages 作者: wanhanwan 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def cross_section_cndl(data, factor_name):
    '''???????????????
    ??????????????

    ??
    ------------------------------
    data:DataFrame(index:[Date,IDs],factor1,factor2,...)

    factor_name:str
    '''
    data = data.reset_index()
    sns.set(style='ticks')

    ax = sns.boxplot(x='Date', y=factor_name, data=data, palette='PRGn')
    sns.despine(offset=10, trim=True)

    return ax

# ??2
# ?????, ?????????????
plot_utils.py 文件源码 项目:saapy 作者: ashapochka 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def plot_author_contributions(commit_frame):
    sns.boxplot(x='author', y='stats_total_lines',
                data=commit_frame,
                orient='v')
    plt.title('Code Contributions by Authors')
    plt.xlabel('Author')
    plt.ylabel('Total Lines Committed')
    plt.xticks(rotation=70)
    plt.show()
boxplot.py 文件源码 项目:coquery 作者: gkunter 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def plot_fnc(self, *args, **kwargs):
        sns.boxplot(*args, **kwargs)
stats.py 文件源码 项目:temci 作者: parttimenerd 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def whiskers(self, whis: float = 1.5) -> t.Tuple[float, float]:
        """
        Calculates the upper and the lower whisker for a boxplot.
        I.e. the minimum and the maximum value of the data set
        the lie in the range (Q1 - whis * IQR, Q3 + whis * IQR).
        IQR being the interquartil distance, Q1 the lower and Q2 the upper quartile.

        Adapted from http://stackoverflow.com/a/20096945
        """
        q1, q2, q3 = self.quartiles()
        iqr = self.iqr()
        hi_val = q1 + whis * self.iqr()
        whisk_hi = np.compress(self.array <= hi_val, self.array)
        if len(whisk_hi) == 0 or np.max(whisk_hi) < q3:
            whisk_hi = q3
        else:
            whisk_hi = max(whisk_hi)

        # get low extreme
        lo_val = q1 - whis * iqr
        whisk_lo = np.compress(self.array >= lo_val, self.array)
        if len(whisk_lo) == 0 or np.min(whisk_lo) > q1:
            whisk_lo = q1
        else:
            whisk_lo = min(whisk_lo)
        return whisk_lo, whisk_hi
plot_journal.py 文件源码 项目:nmt-repr-analysis 作者: boknilev 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def plot_averages_by_distance(df, figname, fignum, use_en_source=True, num_accs=24, pointplot=True, hue='Distance'):

    plt.figure(fignum)
    if use_en_source:
        df_side = df[(df.source == 'en') & (df.target != 'en')]
        layers = np.concatenate([[i]*5 for i in range(5)] * num_accs)        
    else:
        df_side = df[(df.source != 'en') & (df.target == 'en')]
        layers = list(range(5))*5*num_accs

    accs = get_accs_from_df(df_side, col_pref='dist')
    flat_accs = np.concatenate(accs)
    dists = np.concatenate([[pretty_dist_names_list[i]]*75 for i in range(8)])
    df_plot = pd.DataFrame({'Layer' : layers, 'Accuracy' : flat_accs, 'Distance' : dists }) 
    #print(df_plot)
    plotfunc = sns.pointplot if pointplot else sns.boxplot
    if hue == 'Distance':
        plotfunc(x='Layer', y='Accuracy', data=df_plot, hue='Distance')
    else:
        plotfunc(x='Distance', y='Accuracy', data=df_plot, hue='Layer')
        plt.xticks(range(8), pretty_dist_names_list)


    plt.tight_layout()
    plt.savefig(figname)
    return fignum + 1
main.py 文件源码 项目:xplore 作者: fahd09 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def explore_feature_variation(self, col=None, use_target=False, **kwargs):
        '''
        Produces univariate plots of a given set of columns. Barplots are used
        for categorical columns while histograms (with fitted density functinos)
        are used for numerical columns.

        If use_target is true, then the variation of the given set of columns
        with respect to the response variable are used (e.g., 2d scatter 
        plots, boxplots, etc).

        Parameters
        ----------
        col : a string of a column name, or a list of many columns names or
                None (default). If col is None, all columns will be used.
        use_target : bool, default False
            Whether to use the target column in the plots.
        **kwargs: additional arguments to be passed to seaborn's distplot or
            to pandas's plotting utilities..
        '''            
        self._validate_params(params_list   = {'col':col},
                              expected_types= {'col':[str,list,type(None)]})        


        if type(col) is str: col = [col]
        if col is None: col = self._get_all_features()
        if use_target == False:
            for column in col:
                if self.is_numeric(self.df[column]) == True:
                    plt.figure(column)
                    #sns.despine(left=True)        
                    sns.distplot(self.df[column], color="m", **kwargs) 
                    plt.title(column)
                    plt.tight_layout()            
                    #plt.figure('boxplot')
                    #sns.boxplot(x=self.df[col], palette="PRGn")
                    #sns.despine(offset=10, trim=True)     
                elif self.is_categorical(self.df[column]) == True:            
                    #print self.df[column].describe()
                    plt.figure(column)
                    #sns.despine(left=True)    
                    if len(self.df[column].unique()) > 30:
                        self.df[column].value_counts()[:20][::-1].plot.barh(**kwargs)
                        #top = pd.DataFrame(data=top)
                        #sns.barplot(y=top.index, x=top)                        
                    else:
                        self.df[column].value_counts()[::-1].plot.barh(**kwargs)
                        #sns.countplot(y=self.df[column])                    
                    plt.title(column)
                    plt.tight_layout()
                else:
                    raise TypeError('TYPE IS NOT SUPPORTED')
        else: # use target variable
            for column in col:
                self.explore_features_covariation(col1=column, col2=self.y, **kwargs)
summary_stats.py 文件源码 项目:fake_news 作者: bmassman 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def calculate_word_count_stats(articles: pd.DataFrame):
    """Calculate aggregate word count statistics on each source's articles."""
    by_source = articles.groupby(['base_url'])['word_count']
    by_source = by_source.agg(['count', 'mean', 'std'])
    by_source.sort_values('count', ascending=False, inplace=True)
    print_full(by_source)

    top_sources = by_source.head(10).index
    top_counts = by_source.reset_index()[by_source.index.isin(top_sources)]
    sns.barplot(x='base_url', y='count', data=top_counts)
    sns.plt.show()
    sns.boxplot(x='base_url', y='word_count',
                data=articles[articles['base_url'].isin(top_sources)])
    sns.plt.show()
summary_stats.py 文件源码 项目:fake_news 作者: bmassman 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def show_articles_by_source(articles: pd.DataFrame):
    """Show boxplot comparing articles by source for fake and true news."""
    by_source = (articles.groupby(['base_url', 'labels'])
                         .size()
                         .reset_index(name='count'))
    by_source = by_source[by_source['count'] > 100]
    sns.boxplot(x='labels', y='count', data=by_source)
    sns.plt.show()
jmultidk_notf.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def plot_box( self, fname_out = None):      
        sns.boxplot(x="Method", y="r2", data=self.df_best_expand, palette="PRGn")
        sns.despine(offset=10, trim=True)
        plt.ylabel( r"$r^2$")
        plt.xlabel( "Methods")

        if fname_out is not None:
            plt.savefig( fname_out) # index should be stored.
        elif self.fname is not None:
            fname_out = self.fname[:-4] + '_box.eps'
            print( 'Default: the figure of self.df_best_expand is saved to', fname_out)
            plt.savefig( fname_out)
jmultidk.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def plot_box( self, fname_out = None):      
        sns.boxplot(x="Method", y="r2", data=self.df_best_expand, palette="PRGn")
        sns.despine(offset=10, trim=True)
        plt.ylabel( r"$r^2$")
        plt.xlabel( "Methods")

        if fname_out is not None:
            plt.savefig( fname_out) # index should be stored.
        elif self.fname is not None:
            fname_out = self.fname[:-4] + '_box.eps'
            print( 'Default: the figure of self.df_best_expand is saved to', fname_out)
            plt.savefig( fname_out)
jseaborn.py 文件源码 项目:jamespy_py3 作者: jskDr 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def boxplot_expension( pdr, method_l, x="Group", y="RP", hue="Method"):
    # method_l = ['No_Regression', 'Mean_Compensation', 'Linear', 'Exp']
    val_s = y
    pdw = expension_4_boxplot( pdr, method_l, x=x, y=y, hue=hue)
    sns.boxplot(x="Group", y=val_s, hue="Method", data=pdw, palette="PRGn")
    sns.despine(offset=10, trim=True)
explore.py 文件源码 项目:deep-learning-experiments 作者: raghakot 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def plot_multi(names, models, angles, runs=1000):
    indices = np.random.permutation(len(X_test))[:runs]

    matched_all = []
    for i, idx in enumerate(indices):
        print("Processing {}/{}".format(i, len(indices)))
        probs, matched = compare(idx, angles, models)
        matched_all.append(matched)

    matched_all = np.array(matched_all)
    order = np.argsort(np.mean(matched_all, axis=0))
    df = pd.DataFrame.from_items([(names[i], matched_all[:, i]) for i in order])
    sb.boxplot(data=df)
    plt.show()
plot.py 文件源码 项目:cohorts 作者: hammerlab 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def stripboxplot(x, y, data, ax=None, significant=None, **kwargs):
    """
    Overlay a stripplot on top of a boxplot.
    """
    ax = sb.boxplot(
        x=x,
        y=y,
        data=data,
        ax=ax,
        fliersize=0,
        **kwargs
    )

    plot = sb.stripplot(
        x=x,
        y=y,
        data=data,
        ax=ax,
        jitter=kwargs.pop("jitter", 0.05),
        color=kwargs.pop("color", "0.3"),
        **kwargs
    )

    if data[y].min() >= 0:
        hide_negative_y_ticks(plot)
    if significant is not None:
        add_significance_indicator(plot=plot, significant=significant)

    return plot
visualization.py 文件源码 项目:Default-Credit-Card-Prediction 作者: AlexPnt 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def visualize_feature_boxplot(X,y,selected_feature,features):
    """
    Visualize the boxplot of a feature

    Keyword arguments:
    X -- The feature vectors
    y -- The target vector
    selected_feature -- The desired feature to obtain the histogram
    features -- Vector of feature names (X1 to XN)
    """

    #create data
    joint_data=np.column_stack((X,y))
    column_names=features

    #create dataframe
    df=pd.DataFrame(data=joint_data,columns=column_names)

    # palette = sea.hls_palette()
    splot=sea.boxplot(data=df,x='Y',y=selected_feature,hue="Y",palette="husl")
    plt.title('BoxPlot Distribution of '+selected_feature)

    #save fig
    output_dir = "img"
    save_fig(output_dir,'{}/{}_boxplot.png'.format(output_dir,selected_feature))
    # plt.show()
analysis.py 文件源码 项目:crop-seq 作者: epigen 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def inspect_bulk(df, df_bulk, de_genes, de_genes_bulk):
    """
    """
    quant_types = [("bitseq", df_bulk)]

    for quant_type, exp_matrix in quant_types:
        print(quant_type)

        # Boxplots of expression
        fig, axis = plt.subplots(1)
        sns.boxplot(data=pd.melt(exp_matrix), x="grna", y="value", hue="condition", ax=axis)
        fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.expression_boxplots.png".format(quant_type)), dpi=300, bbox_inches="tight")

        # Heatmap and correlation on signature genes
        # derived from bulk
        # derived from scRNA
        for geneset in ["de_genes", "de_genes_bulk"]:
            g = sns.clustermap(
                exp_matrix.ix[eval(geneset)].dropna(),
                z_score=0,
                row_cluster=True, col_cluster=True,
                xticklabels=True, yticklabels=True,
                figsize=(15, 15))
            for item in g.ax_heatmap.get_yticklabels():
                item.set_rotation(0)
            for item in g.ax_heatmap.get_xticklabels():
                item.set_rotation(90)
            g.fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.{}.png".format(quant_type, geneset)), dpi=300, bbox_inches="tight")

            g = sns.clustermap(
                exp_matrix.ix[eval(geneset)].dropna().corr(),
                row_cluster=True, col_cluster=True,
                xticklabels=True, yticklabels=True,
                figsize=(15, 15))
            for item in g.ax_heatmap.get_yticklabels():
                item.set_rotation(0)
            for item in g.ax_heatmap.get_xticklabels():
                item.set_rotation(90)
            g.fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.{}.correlation.png".format(quant_type, geneset)), dpi=300, bbox_inches="tight")
plots.py 文件源码 项目:AlphaPy 作者: ScottFreeLLC 项目源码 文件源码 阅读 43 收藏 0 点赞 0 评论 0
def plot_box(df, x, y, hue, tag='eda', directory=None):
    r"""Display a Box Plot.

    Parameters
    ----------
    df : pandas.DataFrame
        The dataframe containing the ``x`` and ``y`` features.
    x : str
        Variable name in ``df`` to display along the x-axis.
    y : str
        Variable name in ``df`` to display along the y-axis.
    hue : str
        Variable name to be used as hue, i.e., another data dimension.
    tag : str
        Unique identifier for the plot.
    directory : str, optional
        The full specification of the plot location.

    Returns
    -------
    None : None.

    References
    ----------

    http://seaborn.pydata.org/generated/seaborn.boxplot.html

    """

    logger.info("Generating Box Plot")

    # Generate the box plot

    box_plot = sns.boxplot(x=x, y=y, hue=hue, data=df)
    sns.despine(offset=10, trim=True)
    box_fig = box_plot.get_figure()

    # Save the plot
    write_plot('seaborn', box_fig, 'box_plot', tag, directory)


#
# Function plot_swarm
#
figure.alpha_diversity.py 文件源码 项目:microbiomeHD 作者: cduvallet 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def plot_alphadf(alphasdf, col_order, labeldict, metric='alpha'):
    """
    Plot faceted alpha diversity.

    Parameters
    ----------
    alphasdf : pandas DataFrame
        columns ['study', 'alpha', 'DiseaseState']
    col_order : list
        dataset IDs in the order they should be plotted
    labeldict : dict
        dictionary with {dataset: label}
    mteric : str
        alpha diversity metric, to use in labeling y axis

    Returns
    -------
    fig : Figure
    """
    sns.set_style('white')
    g = sns.FacetGrid(alphasdf, col='study', col_wrap=6,
                      col_order=col_order, sharex=False, sharey=False)
    g = g.map(sns.boxplot, "DiseaseState", "alpha")
    g = g.map(sns.stripplot, "DiseaseState", "alpha", split=True, jitter=True,
              size=5, linewidth=0.6)

    fig = plt.gcf()
    fig.set_size_inches(14.2, 9)

    # Fix y-axis gridlines
    axs = g.axes
    for i in range(len(axs)):
        ax = axs[i]
        yticks = ax.get_yticks()
        # If bottom limit is between 0 and 1 (i.e. not simpson)
        if not (yticks[0] < 1 and yticks[0] > 0):
            ax.set_ylim(floor(yticks[0]), floor(yticks[-1]))
        if yticks[0] < 0:
            ax.set_ylim(0, floor(yticks[-1]))

        yticks = ax.get_yticks()
        if (yticks[0] < 1 and yticks[0] > 0):
            ax.set_yticks(yticks[1::2])
        else:
            ax.set_yticks(yticks[::2])
            # Need some space on the y-axis for p-values
            ax.set_ylim(ax.get_ylim()[0], 1.2*ax.get_ylim()[1])
        # Update title
        oldtitle = ax.get_title()
        newtitle = labeldict[oldtitle.split('=')[1].strip()]
        ax.set_title(newtitle)

        # Update y label
        if i % 6 == 0:
            ax.set_ylabel(metric)

    plt.tight_layout()
    return fig
figure.ubiquity_abundance_boxplots.py 文件源码 项目:microbiomeHD 作者: cduvallet 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def plot_ubiq_abun_boxplot(tidy, metric, calculation):
    """
    Plot boxplot where x-axis is 'overall_significance' of genus, and values
    are either ubiquity or abundance in tidy (with the respective metric and
    calculation type)

    Parameters
    ----------
    tidy : pandas dataframe
        has columns overall_significance, value, patient, metric, and calculation
    metric : str
        'abundance' or 'ubiquity'
    calculation: str
        'from_pooled_mean' or 'mean_of_datasets'

    Returns
    -------
    ax : Axis object
    """
    fig, ax = plt.subplots(figsize=(5.5,4))
    tmp = tidy.query('metric == @metric')\
              .query('calculation == @calculation')\
              .query('patient == "total"')

    boxprops = {'edgecolor': 'k', 'facecolor': 'w'}
    lineprops = {'color': 'k'}

    # Plot log10(abundance)
    if metric == 'abundance':
        tmp.loc[tmp.index, 'value'] = tmp['value'].apply(np.log10)

    sns.boxplot(data=tmp, x='overall_significance', y='value',
                fliersize=0, ax=ax, color='w',
                order=['health', 'disease', 'mixed', 'not_sig'],
                **{'boxprops': boxprops, 'medianprops': lineprops,
                   'whiskerprops': lineprops, 'capprops': lineprops})

    sns.stripplot(data=tmp, x='overall_significance', y='value',
                  jitter=True, linewidth=0.6, split=True, ax=ax,
                  order=['health', 'disease', 'mixed', 'not_sig'],
                  color='w')
    return fig, ax
visualization.py 文件源码 项目:Default-Credit-Card-Prediction 作者: AlexPnt 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def visualize_boxplots(X,y):
    """
    Visualize the boxplots of the features

    Keyword arguments:
    X -- The feature vectors
    y -- The target vector
    """

    credit=X[:,0:1]
    df=pd.DataFrame(data=credit,columns=["Credit"])
    splot=sea.boxplot(data=df, orient="h",palette="husl")
    plt.title('BoxPlot Distribution of Credit')
    plt.show()

    one_to_four_columns=X[:,1:4]
    df=pd.DataFrame(data=one_to_four_columns,columns=["Gender","Education","Marital Status"])
    splot=sea.boxplot(data=df, orient="h",palette="husl")
    plt.title('BoxPlot Distribution of Features: Gender, Education and Marital Status')
    plt.show()

    age=X[:,4:5]
    df=pd.DataFrame(data=age,columns=["Age"])
    splot=sea.boxplot(data=df, orient="h",palette="husl")
    plt.title('BoxPlot Distribution of Age')
    plt.show()

    x6_to_x11=X[:,5:11]
    df=pd.DataFrame(data=x6_to_x11,columns=["X6","X7","X8","X9","X10","X11"])
    splot=sea.boxplot(data=df, orient="h",palette="husl")
    plt.title('BoxPlot Distribution of Features: History of Payment')
    plt.show()

    x12_to_x17=X[:,11:17]
    df=pd.DataFrame(data=x12_to_x17,columns=["X12","X13","X14","X15","X16","X17"])
    splot=sea.boxplot(data=df, orient="h",palette="husl")
    plt.title('BoxPlot Distribution of Features: Amount of Bill Statements')
    plt.show()

    x18_to_x23=X[:,17:23]
    df=pd.DataFrame(data=x12_to_x17,columns=["X18","X19","X20","X21","X22","X23"])
    splot=sea.boxplot(data=df, orient="h",palette="husl")
    plt.title('BoxPlot Distribution of Features: Amount of Previous Payments')
    plt.show()


问题


面经


文章

微信
公众号

扫码关注公众号