def work(self, fig=None, ax=None):
"""Draw a two dimensional kernel density plot.
You can specify either a figure or an axis to draw on.
Parameters:
-----------
fig: matplotlib figure object
ax: matplotlib axis object to draw on
Returns:
--------
fig, ax: matplotlib figure and axis objects
"""
if ax is None:
if fig is None:
return fig, ax
else:
ax = fig.gca()
x = self.data[self.aes['x']]
y = self.data[self.aes['y']]
# TODO: unused?
# rvs = np.array([x, y])
x_min = x.min()
x_max = x.max()
y_min = y.min()
y_max = y.max()
X, Y = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([x, y])
import scipy.stats as stats
kernel = stats.gaussian_kde(values)
Z = np.reshape(kernel(positions).T, X.shape)
ax.contour(Z, extent=[x_min, x_max, y_min, y_max])
return fig, ax
python类gaussian_kde()的实例源码
rplot.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def plot_pdf(x, cov_factor=None, *args, **kwargs):
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
density = gaussian_kde(x)
xgrid = np.linspace(min(x), max(x), 200)
if cov_factor is not None:
density.covariance_factor = lambda: cov_factor
density._compute_covariance()
y = density(xgrid)
plt.plot(xgrid, y, *args, **kwargs)
def plot_pdf(x, cov_factor=None, *args, **kwargs):
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
density = gaussian_kde(x)
xgrid = np.linspace(min(x), max(x), 200)
if cov_factor is not None:
density.covariance_factor = lambda: cov_factor
density._compute_covariance()
y = density(xgrid)
plt.plot(xgrid, y, *args, **kwargs)
def model_accuracy(results, scatter_lims, error_density_lims, output_file):
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
ax1.scatter(results['actuals'], results['estimates'])
max_point = results[['actuals', 'estimates']].max().max()
ax1.plot([0, max_point], [0, max_point])
ax1.set_xlim(scatter_lims)
ax1.set_ylim(scatter_lims)
density = gaussian_kde(results['error'][results['estimates']>0])
x = np.linspace(error_density_lims[0], error_density_lims[1], 10000)
ax2.plot(x, density(x))
plt.savefig(output_file)
def __init__(self, sample, label=None):
"""Estimates the density function based on a sample.
sample: sequence of data
label: string
"""
self.label = label if label is not None else '_nolegend_'
self.kde = stats.gaussian_kde(sample)
low = min(sample)
high = max(sample)
self.linspace = np.linspace(low, high, 101)
def plot_rugdensity(series, name=None, ylab=None, xlab=None):
if len(series) > 1:
dens = gaussian_kde(series)
x = np.linspace(np.min(series), np.max(series), 100)
y = dens.evaluate(x)*np.max(series)
d_rug = Scatter(
x=series,
y=[0]*len(series),
mode='markers',
marker=Marker(
color='rgba(0,0,0,0.9)',
symbol='line-ns-open',
size=10,
opacity=0.5
),
name=name
)
else:
x = 0
y = series
d_dens = Scatter(
x=x,
y=y,
line=Line(
color='rgba(0,0,0,0.9)'
),
hoverinfo='x',
name=name,
)
if len(series) > 1:
data = [d_dens, d_rug]
else:
data = [d_dens]
layout = std_layout(name, ylab, xlab)
fig = Figure(data=data, layout=layout)
return fig
def featureMapDensity(lang='py', normed=True):
freq = getValue(lang, type='freq')
raw = np.array([])
for i in xrange(len(freq)):
raw = np.append(raw, [i]*freq[i])
print "[SUCCESS] Calculated raw for", lang
gaussKDE = gaussian_kde(raw, bw_method=0.5)
ind = np.linspace(1, 115, 200)
plt.plot(ind, gaussKDE(ind), label="%s"%lang)
def __init__(self, param_priors, param_hyperparameters, n_iter):
self.hyperparameters = collections.OrderedDict(sorted(param_hyperparameters.items()))
self.n_iter = n_iter
self.param_index = []
self.distributions = {}
kde_data = None
for name, hyperparameter in param_hyperparameters.items():
if isinstance(hyperparameter, UniformFloatHyperparameter):
self.param_index.append(name)
data = np.array(param_priors[name])
if hyperparameter.log:
data = np.log2(data)
if kde_data is None:
kde_data = np.reshape(np.array(data), (1, len(data)))
else:
reshaped = np.reshape(np.array(data), (1, len(data)))
kde_data = np.concatenate((kde_data, reshaped), axis=0)
elif isinstance(hyperparameter, UniformIntegerHyperparameter):
raise ValueError('UniformIntegerHyperparameter not yet implemented:', name)
elif isinstance(hyperparameter, CategoricalHyperparameter):
self.distributions[name] = openmlpimp.utils.rv_discrete_wrapper(name, param_priors[name])
else:
raise ValueError()
if len(self.param_index) < 2:
raise ValueError('Need at least 2 float hyperparameters')
self.kde = gaussian_kde(kde_data)
def __init__(self, sample, label=None):
"""Estimates the density function based on a sample.
sample: sequence of data
label: string
"""
self.label = label if label is not None else '_nolegend_'
self.kde = stats.gaussian_kde(sample)
low = min(sample)
high = max(sample)
self.linspace = np.linspace(low, high, 101)
def __init__(self, sample, label=None):
"""Estimates the density function based on a sample.
sample: sequence of data
label: string
"""
self.label = label if label is not None else '_nolegend_'
self.kde = stats.gaussian_kde(sample)
low = min(sample)
high = max(sample)
self.linspace = np.linspace(low, high, 101)
def weighted_kde(self, data, weights, xs):
data_wt = np.array([data[i] for i in range(len(data))
for w in range(weights[i])])
density = gaussian_kde(data_wt)
ys = density(xs)
return ys
def __init__(self, pe):
self.MINPE = pe.MINPE
self.x_grid = np.arange(SPAN)
# Build KDE from global_lens
kde = gaussian_kde(pe.global_lens)
pdf = kde.evaluate(self.x_grid)
#pdf[pdf < SMALL_VALUE] = SMALL_VALUE
self.pdf = pdf / pdf.sum()
#self.cdf = np.cumsum(self.pdf)
self.target_lens = pe.target_lens
self.ref = pe.ref
self.db = {}
def kdepeak(x, x_grid=None):
'''
Parameters
----------
Returns
-------
'''
if x_grid==None:
x_grid = np.linspace(np.min(x),np.max(x),201)
kde = gaussian_kde(x)
return x_grid,kde.evaluate(x_grid)
def visual_get_kde(self):
mu = self.__mu.ravel()
density = gaussian_kde(mu)
xs = np.linspace(mu.min(),mu.max(),200)
density.covariance_factor = lambda : .25
density._compute_covariance()
return xs, density(xs)
def create_x_pdf(self, where=None, color=None):
if where is None:
where = self.good
if color is None:
color = '#bbbbbb'
xmin, xmax = self.xlim
ymin, ymax = self.ylim
x = np.linspace(xmin, xmax, 100)
x_kde = stats.gaussian_kde(self.x[where])
x_pdf = x_kde(x)
x_pdf_n = x_pdf/np.nanmax(x_pdf)*(ymax - ymin)*self._FAC + ymin
"""
x_pdf = self.crossplot_ax.fill_between(x, x_pdf_n, ymin,
color=color, lw=0,
alpha=alpha, zorder=-1)
"""
x_pdf, = self.crossplot_ax.plot(x, x_pdf_n, color=color[:3],
alpha=color[-1], zorder=-1)
#"""
self.x_pdfs.append(x_pdf)
self.crossplot_ax.set_xlim(*self.xlim)
self.crossplot_ax.set_ylim(*self.ylim)
def create_xy_pdf(self, where=None, color=None):
if where is None:
where = self.good
if color is None:
color = '#bbbbbb'
xmin, xmax = self.xlim
ymin, ymax = self.ylim
X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([self.x[where], self.y[where]])
xy_kde = stats.gaussian_kde(values)
xy_pdf = np.reshape(xy_kde(positions), X.shape)
"""
vmin = np.nanmin(xy_pdf)
vmax = np.nanmax(xy_pdf)
cmap = matplotlib.cm.gray_r
xy_pdf = self.crossplot_ax.contourf(X, Y, xy_pdf, 20, cmap=cmap,
vmin=vmin, vmax=2*vmax, alpha=alpha,
zorder=-2)
"""
xy_pdf = self.crossplot_ax.contour(X, Y, xy_pdf, 5, colors=color[:3],
alpha=color[-1], zorder=-2)
#"""
self.xy_pdfs.append(xy_pdf)
self.crossplot_ax.set_xlim(*self.xlim)
self.crossplot_ax.set_ylim(*self.ylim)
def create_x_pdf(self, where=None, color=None):
if where is None:
where = self.good
if color is None:
color = '#bbbbbb'
xmin, xmax = self.xlim
ymin, ymax = self.ylim
x = np.linspace(xmin, xmax, 100)
x_kde = stats.gaussian_kde(self.x[where])
x_pdf = x_kde(x)
x_pdf_n = x_pdf/np.nanmax(x_pdf)*(ymax - ymin)*self._FAC + ymin
"""
x_pdf = self.crossplot_ax.fill_between(x, x_pdf_n, ymin,
color=color, lw=0,
alpha=alpha, zorder=-1)
"""
x_pdf, = self.crossplot_ax.plot(x, x_pdf_n, color=color[:3],
alpha=color[-1], zorder=-1)
#"""
self.x_pdfs.append(x_pdf)
self.crossplot_ax.set_xlim(*self.xlim)
self.crossplot_ax.set_ylim(*self.ylim)
def fit(self, data):
self.gkde = stats.gaussian_kde(data.T, self.bw)
def _step(self, action):
obs, reward, done, info = self.env.step(action)
location = info.get('location')
if location is not None:
"""
self.locations.append(location)
if len(self.locations) == self.buffer_size:
# rebuild the kde
self.kde = stats.gaussian_kde(np.array(self.locations).T, self.bandwidth)
# plot it?
dims = obs.shape[:2]
grid = np.indices(dims)
kde = self.kde.logpdf(grid.reshape([2, -1]))
kde = kde.reshape(dims)
info['kde'] = kde
#plt.imsave('test.png', kde)
# drop the older locations
self.locations = self.locations[self.buffer_size//2:]
#plt.imsave('counts.png', self.counts)
#info['logprob'] = logprob
if self.kde:
logpdf = self.kde.logpdf(np.array(location))
info['logpdf'] = logpdf
reward -= logpdf
"""
location = location + self.breadth # padding
index = tuple(location.tolist())
patch = extract_patch(self.counts, index, self.breadth)
count = (self.kernel * patch).sum()
info['log/visits'] = count
logprob = np.log(count / self.total)
info['log/visit_logprob'] = logprob
#reward = 0
bonus = self.explore_scale * (self.logprob - logprob)
info['log/explore_bonus'] = np.abs(bonus)
reward += bonus
self.logprob = logprob
if self.decay:
self.counts *= self.decay
else:
self.total += 1
self.counts[index] += 1
return obs, reward, done, info
def _step(self, action):
obs, reward, done, info = self.env.step(action)
location = info.get('location')
if location is not None:
"""
self.locations.append(location)
if len(self.locations) == self.buffer_size:
# rebuild the kde
self.kde = stats.gaussian_kde(np.array(self.locations).T, self.bandwidth)
# plot it?
dims = obs.shape[:2]
grid = np.indices(dims)
kde = self.kde.logpdf(grid.reshape([2, -1]))
kde = kde.reshape(dims)
info['kde'] = kde
#plt.imsave('test.png', kde)
# drop the older locations
self.locations = self.locations[self.buffer_size//2:]
#plt.imsave('counts.png', self.counts)
#info['logprob'] = logprob
if self.kde:
logpdf = self.kde.logpdf(np.array(location))
info['logpdf'] = logpdf
reward -= logpdf
"""
location = location + self.breadth # padding
index = tuple(location.tolist())
patch = extract_patch(self.counts, index, self.breadth)
count = (self.kernel * patch).sum()
info['log/visits'] = count
logprob = np.log(count / self.total)
info['log/visit_logprob'] = logprob
#reward = 0
bonus = self.explore_scale * (self.logprob - logprob)
info['log/explore_bonus'] = np.abs(bonus)
reward += bonus
self.logprob = logprob
if self.decay:
self.counts *= self.decay
else:
self.total += 1
self.counts[index] += 1
return obs, reward, done, info