main.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:xplore 作者: fahd09 项目源码 文件源码
def explore_feature_variation(self, col=None, use_target=False, **kwargs):
        '''
        Produces univariate plots of a given set of columns. Barplots are used
        for categorical columns while histograms (with fitted density functinos)
        are used for numerical columns.

        If use_target is true, then the variation of the given set of columns
        with respect to the response variable are used (e.g., 2d scatter 
        plots, boxplots, etc).

        Parameters
        ----------
        col : a string of a column name, or a list of many columns names or
                None (default). If col is None, all columns will be used.
        use_target : bool, default False
            Whether to use the target column in the plots.
        **kwargs: additional arguments to be passed to seaborn's distplot or
            to pandas's plotting utilities..
        '''            
        self._validate_params(params_list   = {'col':col},
                              expected_types= {'col':[str,list,type(None)]})        


        if type(col) is str: col = [col]
        if col is None: col = self._get_all_features()
        if use_target == False:
            for column in col:
                if self.is_numeric(self.df[column]) == True:
                    plt.figure(column)
                    #sns.despine(left=True)        
                    sns.distplot(self.df[column], color="m", **kwargs) 
                    plt.title(column)
                    plt.tight_layout()            
                    #plt.figure('boxplot')
                    #sns.boxplot(x=self.df[col], palette="PRGn")
                    #sns.despine(offset=10, trim=True)     
                elif self.is_categorical(self.df[column]) == True:            
                    #print self.df[column].describe()
                    plt.figure(column)
                    #sns.despine(left=True)    
                    if len(self.df[column].unique()) > 30:
                        self.df[column].value_counts()[:20][::-1].plot.barh(**kwargs)
                        #top = pd.DataFrame(data=top)
                        #sns.barplot(y=top.index, x=top)                        
                    else:
                        self.df[column].value_counts()[::-1].plot.barh(**kwargs)
                        #sns.countplot(y=self.df[column])                    
                    plt.title(column)
                    plt.tight_layout()
                else:
                    raise TypeError('TYPE IS NOT SUPPORTED')
        else: # use target variable
            for column in col:
                self.explore_features_covariation(col1=column, col2=self.y, **kwargs)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号