def explore_feature_variation(self, col=None, use_target=False, **kwargs):
'''
Produces univariate plots of a given set of columns. Barplots are used
for categorical columns while histograms (with fitted density functinos)
are used for numerical columns.
If use_target is true, then the variation of the given set of columns
with respect to the response variable are used (e.g., 2d scatter
plots, boxplots, etc).
Parameters
----------
col : a string of a column name, or a list of many columns names or
None (default). If col is None, all columns will be used.
use_target : bool, default False
Whether to use the target column in the plots.
**kwargs: additional arguments to be passed to seaborn's distplot or
to pandas's plotting utilities..
'''
self._validate_params(params_list = {'col':col},
expected_types= {'col':[str,list,type(None)]})
if type(col) is str: col = [col]
if col is None: col = self._get_all_features()
if use_target == False:
for column in col:
if self.is_numeric(self.df[column]) == True:
plt.figure(column)
#sns.despine(left=True)
sns.distplot(self.df[column], color="m", **kwargs)
plt.title(column)
plt.tight_layout()
#plt.figure('boxplot')
#sns.boxplot(x=self.df[col], palette="PRGn")
#sns.despine(offset=10, trim=True)
elif self.is_categorical(self.df[column]) == True:
#print self.df[column].describe()
plt.figure(column)
#sns.despine(left=True)
if len(self.df[column].unique()) > 30:
self.df[column].value_counts()[:20][::-1].plot.barh(**kwargs)
#top = pd.DataFrame(data=top)
#sns.barplot(y=top.index, x=top)
else:
self.df[column].value_counts()[::-1].plot.barh(**kwargs)
#sns.countplot(y=self.df[column])
plt.title(column)
plt.tight_layout()
else:
raise TypeError('TYPE IS NOT SUPPORTED')
else: # use target variable
for column in col:
self.explore_features_covariation(col1=column, col2=self.y, **kwargs)
评论列表
文章目录