def action_distribution(actions, ax=None, file="action_ditribution.png"):
plt.figure(figsize=(10, 10))
sb.distplot(actions, kde=False, ax=ax)
plt.ylabel("probability")
plt.xlabel("action")
plt.title("Action distribution")
plt.savefig(file)
plt.close()
python类distplot()的实例源码
def update(data, ax, xlim, ylim, vl):
ax.clear()
sns.distplot(data, ax=ax)
if xlim:
ax.set_xlim(xlim)
if ylim:
ax.set_ylim(ylim)
if vl is not None:
ax.plot([vl, vl], ax.get_ylim(), "k--")
return ax
def generate_test_figures_2d_histogram(cls):
"""generate_test_figures_2d_histogram class method.
Generate a tuple of 2d histogram figures.
"""
# Create series. Will be divided by more than //2 when all plots are
# ready.
def dist_function01(): return np.random.normal(
size=cls.n_lines_test_pandas)
def dist_function02(): return np.random.randint(
0,
99999) * np.arange(cls.n_lines_test_pandas)
def dist_function03(): return np.random.randint(
0,
99999) * np.ones(cls.n_lines_test_pandas)
dist_functions = (dist_function01, dist_function02, dist_function03)
iterable_of_series = (pd.Series(np.random.choice(dist_functions)())
for _ in range(cls.n_graphical_tests//2))
# Create figures from series.
figures = tuple(map(
cls.figure_from_plot_function,
itertools.repeat(lambda x: sns.distplot(x, kde=False)),
iterable_of_series))
return figures
def histogram_of_floats(a,
*args,
**sns_distplot_kwargs):
"""Plot a histogram of floats with sane defauts.
Arguments:
a (pd.Series): Float series to create a histogram plot.
Returns:
matplotlib.axes.Axes: the plotted axes.
Examples:
>>> import pandas_utilities as pu
>>> float_serie = pu.dummy_dataframe().float_0
>>> fig = plt.figure()
>>> axes = histogram_of_floats(float_serie, kde=False)
>>> isinstance(axes, matplotlib.axes.Axes)
True
>>> fig.savefig('/tmp/doctest_{0}.png'.format( \
'histogram_of_floats'), dpi=500)
"""
axes = sns.distplot(
a,
*args,
**sns_distplot_kwargs)
return axes
def display_distrib(pd, feature):
plt.figure()
sns.distplot(pd[feature].dropna() , fit=norm);
(mu, sigma) = norm.fit(pd[feature].dropna())
plt.legend(['Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu, sigma)], loc='best')
plt.ylabel('Frequency')
plt.title('SalePrice distribution')
plt.show()
def plot_fd(fd_file, fd_radius, mean_fd_dist=None, figsize=DINA4_LANDSCAPE):
fd_power = _calc_fd(fd_file, fd_radius)
fig = plt.Figure(figsize=figsize)
FigureCanvas(fig)
if mean_fd_dist:
grid = GridSpec(2, 4)
else:
grid = GridSpec(1, 2, width_ratios=[3, 1])
grid.update(hspace=1.0, right=0.95, left=0.1, bottom=0.2)
ax = fig.add_subplot(grid[0, :-1])
ax.plot(fd_power)
ax.set_xlim((0, len(fd_power)))
ax.set_ylabel("Frame Displacement [mm]")
ax.set_xlabel("Frame number")
ylim = ax.get_ylim()
ax = fig.add_subplot(grid[0, -1])
sns.distplot(fd_power, vertical=True, ax=ax)
ax.set_ylim(ylim)
if mean_fd_dist:
ax = fig.add_subplot(grid[1, :])
sns.distplot(mean_fd_dist, ax=ax)
ax.set_xlabel("Mean Frame Displacement (over all subjects) [mm]")
mean_fd = fd_power.mean()
label = r'$\overline{{\text{{FD}}}}$ = {0:g}'.format(mean_fd)
plot_vline(mean_fd, label, ax=ax)
return fig
def plot_distribution(data, title):
data = np.array([d for d in data])
sns.distplot(data, rug=True)
plt.title(title)
plt.show()
def plot_filter_densities(densities, filename=None):
sns.set(font_scale=1.3)
fig, ax = plt.subplots()
sns.distplot(densities, kde=False, ax=ax)
ax.set_xlabel('Activation')
if filename:
fig.savefig(filename)
plt.close()
def explore_feature_variation(self, col=None, use_target=False, **kwargs):
'''
Produces univariate plots of a given set of columns. Barplots are used
for categorical columns while histograms (with fitted density functinos)
are used for numerical columns.
If use_target is true, then the variation of the given set of columns
with respect to the response variable are used (e.g., 2d scatter
plots, boxplots, etc).
Parameters
----------
col : a string of a column name, or a list of many columns names or
None (default). If col is None, all columns will be used.
use_target : bool, default False
Whether to use the target column in the plots.
**kwargs: additional arguments to be passed to seaborn's distplot or
to pandas's plotting utilities..
'''
self._validate_params(params_list = {'col':col},
expected_types= {'col':[str,list,type(None)]})
if type(col) is str: col = [col]
if col is None: col = self._get_all_features()
if use_target == False:
for column in col:
if self.is_numeric(self.df[column]) == True:
plt.figure(column)
#sns.despine(left=True)
sns.distplot(self.df[column], color="m", **kwargs)
plt.title(column)
plt.tight_layout()
#plt.figure('boxplot')
#sns.boxplot(x=self.df[col], palette="PRGn")
#sns.despine(offset=10, trim=True)
elif self.is_categorical(self.df[column]) == True:
#print self.df[column].describe()
plt.figure(column)
#sns.despine(left=True)
if len(self.df[column].unique()) > 30:
self.df[column].value_counts()[:20][::-1].plot.barh(**kwargs)
#top = pd.DataFrame(data=top)
#sns.barplot(y=top.index, x=top)
else:
self.df[column].value_counts()[::-1].plot.barh(**kwargs)
#sns.countplot(y=self.df[column])
plt.title(column)
plt.tight_layout()
else:
raise TypeError('TYPE IS NOT SUPPORTED')
else: # use target variable
for column in col:
self.explore_features_covariation(col1=column, col2=self.y, **kwargs)
def plot_score_distribution(y_pred, so):
""" Plots scores of predicted values """
min_x = min(min(y_pred), 0)
max_x = max(max(y_pred), 1)
sns.distplot(y_pred, kde=False)
plt.title("distribution of scores for {} model".format(so['model_name']))
plt.xlabel("raw prediction score")
plt.xlim([min_x, max_x])
plt.ylabel("number of street segments")
base = so['results_dir'] + so['model_name'] + "_" + \
str(so['timestamp']) + "_" + so['break_window']
plt.savefig(base + '_score_distribution.png', bbox_inches='tight')
plt.close()
def plot_pdf(df):
df_num = df.select_dtypes(include=[np.float, np.int])
# rows = df_num / 3
# f, axes = plt.subplots(3, rows + 1)
# print axes
for index in df_num.columns:
try:
sns.distplot(df_num[index], color="m")
except:
print index, "error (probably Nan)"
def continuous_plots(dataFrame, continuous_factors):
plots = plt.subplots(len(continuous_factors), 2, figsize=(8,12))
column = 0
for factor in continuous_factors:
sns.distplot(dataFrame[factor],ax=plots[1][0][column], label=factor)
plots[1][0][column].legend()
column += 1
plt.tight_layout()
def plot_dist(*args):
import seaborn as sns
for x in args:
plt.figure()
sns.distplot(x)
plt.show()
def plot_dist(*args):
import seaborn as sns
for x in args:
plt.figure()
sns.distplot(x)
plt.show()
def plot_mean_bootstrap():
X = [-1, 0, 1]
posterior_samples = mean(X, 10000)
sns.distplot(posterior_samples)
classical_samples = [np.mean(resample(X)) for _ in range(10000)]
sns.distplot(classical_samples)
plt.show()
def plot_mean_resample_bootstrap():
X = [-1, 0, 1]
posterior_samples = bayesian_bootstrap(X, np.mean, 10000, 100)
sns.distplot(posterior_samples)
classical_samples = [np.mean(resample(X)) for _ in range(10000)]
sns.distplot(classical_samples)
plt.show()
def plot_median():
X = np.random.uniform(-1, 1, 10)
posterior_samples = bayesian_bootstrap(X, np.median, 10000, 100)
sns.distplot(posterior_samples)
classical_samples = [np.median(resample(X)) for _ in range(10000)]
sns.distplot(classical_samples)
plt.show()
def plot_var_bootstrap():
X = np.random.uniform(-1, 1, 100)
posterior_samples = var(X, 10000)
sns.distplot(posterior_samples)
classical_samples = [np.var(resample(X)) for _ in range(10000)]
sns.distplot(classical_samples)
plt.show()
def plot_self_covar_bootstrap():
X = np.random.uniform(-1, 1, 100)
posterior_samples = covar(X, X, 10000)
sns.distplot(posterior_samples)
plt.show()
def plot_var_resample_bootstrap():
X = np.random.uniform(-1, 1, 100)
posterior_samples = bayesian_bootstrap(X, np.var, 10000, 500)
sns.distplot(posterior_samples)
classical_samples = [np.var(resample(X)) for _ in range(10000)]
sns.distplot(classical_samples)
plt.show()