def kde_scipy(data, grid, **kwargs):
"""
Kernel Density Estimation with Scipy
Parameters
----------
data : numpy.array
Data points used to compute a density estimator. It
has `n x p` dimensions, representing n points and p
variables.
grid : numpy.array
Data points at which the desity will be estimated. It
has `m x p` dimensions, representing m points and p
variables.
Returns
-------
out : numpy.array
Density estimate. Has `m x 1` dimensions
"""
kde = gaussian_kde(data.T, **kwargs)
return kde.evaluate(grid.T)
python类gaussian_kde()的实例源码
def plot(samples, path = None, true_value = 5, title = 'ABC posterior'):
Bayes_estimate = np.mean(samples, axis = 0)
theta = true_value
xmin, xmax = max(samples[:,0]), min(samples[:,0])
positions = np.linspace(xmin, xmax, samples.shape[0])
gaussian_kernel = gaussian_kde(samples[:,0].reshape(samples.shape[0],))
values = gaussian_kernel(positions)
plt.figure()
plt.plot(positions,gaussian_kernel(positions))
plt.plot([theta, theta],[min(values), max(values)+.1*(max(values)-min(values))])
plt.plot([Bayes_estimate, Bayes_estimate],[min(values), max(values)+.1*(max(values)-min(values))])
plt.ylim([min(values), max(values)+.1*(max(values)-min(values))])
plt.xlabel(r'$\theta$')
plt.ylabel('density')
#plt.xlim([0,1])
plt.rc('axes', labelsize=15)
plt.legend(loc='best', frameon=False, numpoints=1)
font = {'size' : 15}
plt.rc('font', **font)
plt.title(title)
if path is not None :
plt.savefig(path)
return plt
def KDE_entropy(x):
'''
Estimate the entropy H(X) from samples {x_i}_{i=1}^N
Using Kernel Density Estimator (KDE) and resubstitution
Input: x: 2D list of size N*d_x
Output: one number of H(X)
'''
N = len(x)
d = len(x[0])
local_est = np.zeros(N)
for i in range(N):
kernel = sst.gaussian_kde(x.transpose())
local_est[i] = kernel.evaluate(x[i].transpose())
return -np.mean(map(log,local_est))
model.py 文件源码
项目:5th_place_solution_facebook_check_ins
作者: aikinogard
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def kde_opt2c(df_cell_train_feats, y_train, df_cell_test_feats):
def prepare_feats(df):
df_new = pd.DataFrame()
df_new["hour"] = df["hour"]
df_new["weekday"] = df["weekday"] + df["hour"] / 24.
df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x))
df_new["x"] = df["x"]
df_new["y"] = df["y"]
return df_new
logging.info("train kde_opt2c model")
df_cell_train_feats_kde = prepare_feats(df_cell_train_feats)
df_cell_test_feats_kde = prepare_feats(df_cell_test_feats)
n_class = len(np.unique(y_train))
y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d")
for i in range(n_class):
X = df_cell_train_feats_kde[y_train == i]
y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d")
for feat in df_cell_train_feats_kde.columns.values:
X_feat = X[feat].values
kde = gaussian_kde(X_feat, "scott")
y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values)
y_test_pred[:, i] += y_test_pred_i
return y_test_pred
model.py 文件源码
项目:5th_place_solution_facebook_check_ins
作者: aikinogard
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def kde_opt3c(df_cell_train_feats, y_train, df_cell_test_feats):
def prepare_feats(df):
df_new = pd.DataFrame()
df_new["hour"] = df["hour"]
df_new["weekday"] = df["weekday"] + df["hour"] / 24.
df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x))
df_new["x"] = df["x"]
df_new["y"] = df["y"]
return df_new
logging.info("train kde_opt3c model")
df_cell_train_feats_kde = prepare_feats(df_cell_train_feats)
df_cell_test_feats_kde = prepare_feats(df_cell_test_feats)
n_class = len(np.unique(y_train))
y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d")
for i in range(n_class):
X = df_cell_train_feats_kde[y_train == i]
y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d")
for feat in df_cell_train_feats_kde.columns.values:
X_feat = X[feat].values
kde = gaussian_kde(X_feat, "scott")
kde = gaussian_kde(X_feat, kde.factor * 0.741379)
y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values)
y_test_pred[:, i] += y_test_pred_i
return y_test_pred
def _scipy_bivariate_kde(x, y, bw, gridsize, cut, clip):
"""Compute a bivariate kde using scipy."""
data = np.c_[x, y]
kde = stats.gaussian_kde(data.T)
data_std = data.std(axis=0, ddof=1)
if isinstance(bw, string_types):
bw = "scotts" if bw == "scott" else bw
bw_x = getattr(kde, "%s_factor" % bw)() * data_std[0]
bw_y = getattr(kde, "%s_factor" % bw)() * data_std[1]
elif np.isscalar(bw):
bw_x, bw_y = bw, bw
else:
msg = ("Cannot specify a different bandwidth for each dimension "
"with the scipy backend. You should install statsmodels.")
raise ValueError(msg)
x_support = _kde_support(data[:, 0], bw_x, gridsize, cut, clip[0])
y_support = _kde_support(data[:, 1], bw_y, gridsize, cut, clip[1])
xx, yy = np.meshgrid(x_support, y_support)
z = kde([xx.ravel(), yy.ravel()]).reshape(xx.shape)
return xx, yy, z
plotting.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def _plot(cls, ax, y, style=None, bw_method=None, ind=None,
column_num=None, stacking_id=None, **kwds):
from scipy.stats import gaussian_kde
from scipy import __version__ as spv
y = remove_na(y)
if LooseVersion(spv) >= '0.11.0':
gkde = gaussian_kde(y, bw_method=bw_method)
else:
gkde = gaussian_kde(y)
if bw_method is not None:
msg = ('bw_method was added in Scipy 0.11.0.' +
' Scipy version in use is %s.' % spv)
warnings.warn(msg)
y = gkde.evaluate(ind)
lines = MPLPlot._plot(ax, ind, y, style=style, **kwds)
return lines
rplot.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def work(self, fig=None, ax=None):
"""Draw a one dimensional kernel density plot.
You can specify either a figure or an axis to draw on.
Parameters:
-----------
fig: matplotlib figure object
ax: matplotlib axis object to draw on
Returns:
--------
fig, ax: matplotlib figure and axis objects
"""
if ax is None:
if fig is None:
return fig, ax
else:
ax = fig.gca()
from scipy.stats import gaussian_kde
x = self.data[self.aes['x']]
gkde = gaussian_kde(x)
ind = np.linspace(x.min(), x.max(), 200)
ax.plot(ind, gkde.evaluate(ind))
return fig, ax
def estimate_mode(acc):
""" Estimate the mode of a set of float values between 0 and 1.
:param acc: Data.
:returns: The mode of the sample
:rtype: float
"""
# Taken from sloika.
if len(acc) > 1:
da = gaussian_kde(acc)
optimization_result = minimize_scalar(lambda x: -da(x), bounds=(0, 1), method='Bounded')
if optimization_result.success:
try:
mode = optimization_result.x[0]
except IndexError:
mode = optimization_result.x
else:
sys.stderr.write("Mode computation failed")
mode = 0
else:
mode = acc[0]
return mode
def density(data, nbins=500, rng=None):
"""
Computes density for metrics which return vectors
Required parameters:
data:
- Dictionary of the vectors of data
"""
density = OrderedDict()
xs = OrderedDict()
for subj in data:
hist = np.histogram(data[subj], nbins)
hist = np.max(hist[0])
dens = gaussian_kde(data[subj])
if rng is not None:
xs[subj] = np.linspace(rng[0], rng[1], nbins)
else:
xs[subj] = np.linspace(0, np.max(data[subj]), nbins)
density[subj] = dens.evaluate(xs[subj])*np.max(data[subj]*hist)
return {"xs": xs, "pdfs": density}
def __init__(self, hyperparameter, param_name, data, oob_strategy='resample', bandwith=0.4):
if oob_strategy not in ['resample', 'round', 'ignore']:
raise ValueError()
self.oob_strategy = oob_strategy
self.param_name = param_name
self.hyperparameter = hyperparameter
reshaped = np.reshape(data, (len(data), 1))
if self.hyperparameter.log:
if isinstance(self.hyperparameter, UniformIntegerHyperparameter):
# self.probabilities = {val: self.distrib.pdf(np.log2(val)) for val in range(self.hyperparameter.lower, self.hyperparameter.upper)}
raise ValueError('Log Integer hyperparameter not supported: %s' %param_name)
# self.distrib = gaussian_kde(np.log2(data))
# self.distrib = KernelDensity(kernel='gaussian').fit(np.log2(np.reshape(data, (len(data), 1))))
self.distrib = KernelDensity(kernel='gaussian', bandwidth=bandwith).fit(np.log2(reshaped))
else:
# self.distrib = gaussian_kde(data)
self.distrib = KernelDensity(kernel='gaussian', bandwidth=bandwith).fit(reshaped)
pass
def test_gaussian_kde(n_samples=1000):
# Compare gaussian KDE results to scipy.stats.gaussian_kde
from scipy.stats import gaussian_kde
np.random.seed(0)
x_in = np.random.normal(0, 1, n_samples)
x_out = np.linspace(-5, 5, 30)
for h in [0.01, 0.1, 1]:
bt = BallTree(x_in[:, None])
try:
gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))
except TypeError:
raise SkipTest("Old version of scipy, doesn't accept "
"explicit bandwidth.")
dens_bt = bt.kernel_density(x_out[:, None], h) / n_samples
dens_gkde = gkde.evaluate(x_out)
assert_array_almost_equal(dens_bt, dens_gkde, decimal=3)
def test_gaussian_kde(n_samples=1000):
# Compare gaussian KDE results to scipy.stats.gaussian_kde
from scipy.stats import gaussian_kde
np.random.seed(0)
x_in = np.random.normal(0, 1, n_samples)
x_out = np.linspace(-5, 5, 30)
for h in [0.01, 0.1, 1]:
kdt = KDTree(x_in[:, None])
try:
gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))
except TypeError:
raise SkipTest("Old scipy, does not accept explicit bandwidth.")
dens_kdt = kdt.kernel_density(x_out[:, None], h) / n_samples
dens_gkde = gkde.evaluate(x_out)
assert_array_almost_equal(dens_kdt, dens_gkde, decimal=3)
def kl_divergence(p_samples, q_samples):
# estimate densities
# p_samples = np.nan_to_num(p_samples)
# q_samples = np.nan_to_num(q_samples)
if isinstance(p_samples, tuple):
idx, p_samples = p_samples
if idx not in _cached_p_pdf:
_cached_p_pdf[idx] = sc.gaussian_kde(p_samples)
p_pdf = _cached_p_pdf[idx]
else:
p_pdf = sc.gaussian_kde(p_samples)
q_pdf = sc.gaussian_kde(q_samples)
# joint support
left = min(min(p_samples), min(q_samples))
right = max(max(p_samples), max(q_samples))
p_samples_num = p_samples.shape[0]
q_samples_num = q_samples.shape[0]
# quantise
lin = np.linspace(left, right, min(max(p_samples_num, q_samples_num), MAX_GRID_POINTS))
p = p_pdf.pdf(lin)
q = q_pdf.pdf(lin)
# KL
kl = min(sc.entropy(p, q), MAX_KL)
return kl
def ci_metrics(p_samples, q_samples, alpha=.95):
if isinstance(p_samples, tuple):
idx, p_samples = p_samples
# compute CIs
p_left, p_right = _compute_ci(p_samples, alpha)
q_left, q_right = _compute_ci(q_samples, alpha)
precision = 0
iou = 0
recall = 0
f1 = 0
if (p_right > q_left) and (q_right > p_left):
# intersection
int_left = max(q_left, p_left)
int_right = min(p_right, q_right)
union_left = min(p_left, q_left)
union_right = max(p_right, q_right)
iou = (int_right - int_left) / (union_right - union_left)
precision = (int_right - int_left) / (q_right - q_left)
recall = (int_right - int_left) / (p_right - p_left)
f1 = 2 * precision * recall / (precision + recall)
# estimate densities
# p_pdf = sc.gaussian_kde(p_samples)
# q_pdf = sc.gaussian_kde(q_samples)
#
# precision = q_pdf.integrate_box(int_left, int_right) / alpha
# recall = p_pdf.integrate_box(int_left, int_right) / alpha
return iou, precision, recall, f1
def visualize_distribution(data):
""" Visualize bucket distribution, a distribution over:
(sentence_number, question_length, sentence_length)"""
bucket_distribution = []
for qid, value in data.iteritems():
q_length = len(value['question'])
s_number = len(value['sentences'])
s_length = max([len(sent) for sent in value['sentences'].itervalues()])
bucket_distribution.append([s_number, q_length, s_length])
# pdb.set_trace()
distribution = np.transpose(np.array(bucket_distribution))
kde = stats.gaussian_kde(distribution)
density = kde(distribution)
idx = density.argsort()
x = distribution[0, idx]
y = distribution[1, idx]
z = distribution[2, idx]
density = density[idx]
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x, y, z, c=density)
ax.set_xlabel('sentence number')
ax.set_ylabel('question length')
ax.set_zlabel('sentence length')
plt.show()
def fit(self, y):
self.lb = y.min() - 1e6
self.ub = y.max() + 1e6
self.kde = gaussian_kde(y)
def bayes_factor(x, y, distribution='normal', num_iters=25000, inference='sampling'):
"""
Args:
x (array_like): sample of a treatment group
y (array_like): sample of a control group
distribution: name of the KPI distribution model, which assumes a
Stan model file with the same name exists
num_iters: number of iterations of bayes sampling
inference: sampling or variational inference method for approximation the posterior
Returns:
dictionary with statistics
"""
traces, n_x, n_y, mu_x, mu_y = _bayes_sampling(x, y, distribution=distribution, num_iters=num_iters,
inference=inference)
trace_normalized_effect_size = get_trace_normalized_effect_size(distribution, traces)
trace_absolute_effect_size = traces['delta']
kde = gaussian_kde(trace_normalized_effect_size)
prior = cauchy.pdf(0, loc=0, scale=1)
# BF_01
bf = kde.evaluate(0)[0] / prior
stop = bf > 3 or bf < 1 / 3.
credibleMass = 0.95 # another magic number
leftOut = 1.0 - credibleMass
p1 = round(leftOut/2.0, 5)
p2 = round(1.0 - leftOut/2.0, 5)
credible_interval = HDI_from_MCMC(trace_absolute_effect_size, credibleMass)
return {'stop' : bool(stop),
'delta' : float(mu_x - mu_y),
'confidence_interval' : [{'percentile': p*100, 'value': v} for p, v in zip([p1, p2], credible_interval)],
'treatment_sample_size' : int(n_x),
'control_sample_size' : int(n_y),
'treatment_mean' : float(mu_x),
'control_mean' : float(mu_y),
'number_of_iterations' : num_iters}
def __plot_scatter(prl, max_points=None, fileout="PR_scatter.png", title="Precision Recall Scatter Plot"):
"""
:param prl: list of tuples (precision, recall)
:param max_points: max number of tuples to plot
:param fileout: output filename
:param title: plot title
"""
prs = [i[0] for i in prl]
recs = [i[1] for i in prl]
if max_points is not None:
prs = prs[:max_points]
recs = recs[:max_points]
xy = np.vstack([prs, recs])
z = gaussian_kde(xy)(xy)
x = np.array(prs)
y = np.array(recs)
base = min(z)
rg = max(z) - base
z = np.array(z)
idx = z.argsort()
x, y, z = x[idx], y[idx], (z[idx] - base) / rg
fig, ax = plt.subplots()
sca = ax.scatter(x, y, c=z, s=50, edgecolor='', cmap=plt.cm.jet)
fig.colorbar(sca)
plt.ylabel("Recall", fontsize=20, labelpad=15)
plt.xlabel("Precision", fontsize=20)
plt.ylim([-0.01, 1.01])
plt.xlim([-0.01, 1.01])
plt.title(title)
if matplotlib.get_backend().lower() in ['agg', 'macosx']:
fig.set_tight_layout(True)
else:
fig.tight_layout()
plt.savefig("%s" % fileout)
def setHistogram(self, values=None, bins=None, use_kde=False, histogram=None):
""" Set background histogram (or density estimation, violin plot)
The histogram of bins is calculated from values, optionally as a
Gaussian KDE. If histogram is provided, its values are used directly
and other parameters are ignored.
"""
if (values is None or not len(values)) and histogram is None:
self.setPixmap(None)
return
if histogram is not None:
self._histogram = hist = histogram
else:
if bins is None:
bins = min(100, max(10, len(values) // 20))
if use_kde:
hist = gaussian_kde(values,
None if isinstance(use_kde, bool) else use_kde)(
np.linspace(np.min(values), np.max(values), bins))
else:
hist = np.histogram(values, bins)[0]
self._histogram = hist = hist / hist.max()
HEIGHT = self.rect().height() / 2
OFFSET = HEIGHT * .3
pixmap = QPixmap(QSize(len(hist), 2 * (HEIGHT + OFFSET))) # +1 avoids right/bottom frame border shadow
pixmap.fill(Qt.transparent)
painter = QPainter(pixmap)
painter.setPen(QPen(Qt.darkGray))
for x, value in enumerate(hist):
painter.drawLine(x, HEIGHT * (1 - value) + OFFSET,
x, HEIGHT * (1 + value) + OFFSET)
if self.orientation() != Qt.Horizontal:
pixmap = pixmap.transformed(QTransform().rotate(-90))
self.setPixmap(pixmap)
def make_kde(self):
"""
Makes the kernel density estimation(s) for create_distplot().
This is called when curve_type = 'kde' in create_distplot().
:rtype (list) curve: list of kde representations
"""
curve = [None] * self.trace_number
for index in range(self.trace_number):
self.curve_x[index] = [self.start[index] +
x * (self.end[index] - self.start[index])
/ 500 for x in range(500)]
self.curve_y[index] = (scipy.stats.gaussian_kde
(self.hist_data[index])
(self.curve_x[index]))
if self.histnorm == ALTERNATIVE_HISTNORM:
self.curve_y[index] *= self.bin_size[index]
for index in range(self.trace_number):
curve[index] = dict(type='scatter',
x=self.curve_x[index],
y=self.curve_y[index],
xaxis='x1',
yaxis='y1',
mode='lines',
name=self.group_labels[index],
legendgroup=self.group_labels[index],
showlegend=False if self.show_hist else True,
marker=dict(color=self.colors[index]))
return curve
def getDensityKernel(x,y):
# filter NaNs and infinite numbers
idx = (np.isfinite(x) * np.isfinite(y))
x = x[idx]
y = y[idx]
# Calculate the point density
xy = np.vstack([x,y])
z = gaussian_kde(xy)(xy)
idx = z.argsort()
return x[idx], y[idx], z[idx]
# RADIAL PROFILES
def maximize(samples):
'''
Use Kernel Density Estimation to find a continuous PDF
from the discrete sampling. Maximize that distribution.
Parameters
----------
samples : list (shape = (nsamples, ndim))
Observations drawn from the distribution which is
going to be fit.
Returns
-------
maximum : list(ndim)
Maxima of the probability distributions along each axis.
'''
from scipy.optimize import minimize
from scipy.stats import gaussian_kde as kde
# Give the samples array the proper shape.
samples = np.transpose(samples)
# Estimate the continous PDF.
estimate = kde(samples)
# Take the minimum of the estimate.
def PDF(x):
return -estimate(x)
# Initial guess on maximum is made from 50th percentile of the sample.
p0 = [np.percentile(samples[i], 50) for i in range(samples.shape[0])]
# Calculate the maximum of the distribution.
maximum = minimize(PDF, p0).x
return maximum
model.py 文件源码
项目:5th_place_solution_facebook_check_ins
作者: aikinogard
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def kde_opt4(df_cell_train_feats, y_train, df_cell_test_feats):
def prepare_feats(df):
df_new = pd.DataFrame()
df_new["hour"] = df["hour"]
df_new["weekday"] = df["weekday"] + df["hour"] / 24.
df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x))
df_new["x"] = df["x"]
df_new["y"] = df["y"]
return df_new
logging.info("train kde_opt4 model")
df_cell_train_feats_kde = prepare_feats(df_cell_train_feats)
df_cell_test_feats_kde = prepare_feats(df_cell_test_feats)
n_class = len(np.unique(y_train))
y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d")
for i in range(n_class):
X = df_cell_train_feats_kde[y_train == i]
y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d")
for feat in df_cell_train_feats_kde.columns.values:
X_feat = X[feat].values
BGK10_output = kdeBGK10(X_feat)
if BGK10_output is None:
kde = gaussian_kde(X_feat, "scott")
kde = gaussian_kde(X_feat, kde.factor * 0.741379)
y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values)
else:
bandwidth, mesh, density = BGK10_output
kde = KernelDensity(kernel='gaussian', metric='manhattan', bandwidth=bandwidth)
kde.fit(X_feat[:, np.newaxis])
y_test_pred_i *= np.exp(kde.score_samples(df_cell_test_feats_kde[feat].values[:, np.newaxis]))
y_test_pred[:, i] += y_test_pred_i
return y_test_pred
def weighted_kde(Bn, conf):
# N.B. apply WKDE function to the instance multiplied by its confindence value
weighted_ins = np.vstack([conf[i] * B.data() for i, B in enumerate(Bn)])
return stats.gaussian_kde(weighted_ins.T)
def _scipy_univariate_kde(data, bw, gridsize, cut, clip):
"""Compute a univariate kernel density estimate using scipy."""
kde = stats.gaussian_kde(data, bw_method=bw)
if isinstance(bw, string_types):
bw = "scotts" if bw == "scott" else bw
bw = getattr(kde, "%s_factor" % bw)() * np.std(data)
grid = _kde_support(data, bw, gridsize, cut, clip)
y = kde(grid)
return grid, y
def plot_distributions(data, cmap = plt.cm.Spectral_r, percentiles = [5, 25, 50, 75, 95], percentiles_colors = ['gray', 'gray', 'red', 'gray', 'gray']):
"""Plots the data point color as local density"""
npoints, ntimes = data.shape;
for t in range(ntimes):
cols = gaussian_kde(data[:,t])(data[:,t]);
idx = cols.argsort();
plt.scatter(np.ones(npoints)*t, data[idx,t], c=cols[idx], s=30, edgecolor='face', cmap = cmap)
pct = np.percentile(data, percentiles, axis = 0);
for i in range(len(percentiles)):
#plt.plot(iqr[s0][i,:], c = plt.cm.Spectral(i/5.0), linewidth = 3);
plt.plot(pct[i,:], c = percentiles_colors[i], linewidth = 2);
def plot_embedding_contours(Y, contours = 10, cmap = plt.cm.plasma, xmin = None, xmax = None, ymin = None, ymax = None, npts = 100, density = False):
"""Plot a 2d density map of the embedding Y"""
if xmin is None:
xmin = np.min(Y[:,0]);
if xmax is None:
xmax = np.max(Y[:,0]);
if ymin is None:
ymin = np.min(Y[:,1]);
if ymax is None:
ymax = np.max(Y[:,1]);
#print xmin,xmax,ymin,ymax
dx = float(xmax-xmin) / npts;
dy = float(xmax-xmin) / npts;
xx, yy = np.mgrid[xmin:xmax:dx, ymin:ymax:dy]
positions = np.vstack([xx.ravel(), yy.ravel()])
kernel = st.gaussian_kde(Y.T);
#print xx.shape
#print positions.shape
#print Y.shape
#print kernel(positions).shape
f = kernel(positions)
f = np.reshape(f, xx.shape)
#print f.shape
ax = plt.gca()
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
# Contourf plot
if density:
cfset = ax.contourf(xx, yy, f, cmap=cmap)
## Or kernel density estimate plot instead of the contourf plot
#ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
# Contour plot
if contours is not None:
cset = ax.contour(xx, yy, f, contours, cmap=cmap)
# Label plot
#ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('Y0')
ax.set_ylabel('Y1')
def plot_distributions(data, cmap = plt.cm.Spectral_r, percentiles = [5, 25, 50, 75, 95], percentiles_colors = ['gray', 'gray', 'red', 'gray', 'gray']):
"""Plots the data point color as local density"""
npoints, ntimes = data.shape;
for t in range(ntimes):
cols = gaussian_kde(data[:,t])(data[:,t]);
idx = cols.argsort();
plt.scatter(np.ones(npoints)*t, data[idx,t], c=cols[idx], s=30, edgecolor='face', cmap = cmap)
pct = np.percentile(data, percentiles, axis = 0);
for i in range(len(percentiles)):
#plt.plot(iqr[s0][i,:], c = plt.cm.Spectral(i/5.0), linewidth = 3);
plt.plot(pct[i,:], c = percentiles_colors[i], linewidth = 2);
def violin_plot(figure,X,data,colors,xlabel="",ylabel="",n_points=400,violin_width=None,linewidth=3,marker_size=20):
font = fm.FontProperties(family = 'Trebuchet', weight ='light')
#font = fm.FontProperties(family = 'CenturyGothic',fname = '/Library/Fonts/Microsoft/Century Gothic', weight ='light')
figure.patch.set_facecolor('white')
axes = figure.add_subplot(111)
if violin_width is None:
if len(X)>1:
violin_width = ((np.array(X)[1:] - np.array(X)[:-1]).mean())/3.
else:
violin_width = 0.33
for x in xrange(len(X)):
color = colors[x]
Y = gaussian_kde(data[x])
D_smooth = np.linspace(np.percentile(Y.dataset,1),np.percentile(Y.dataset,99),n_points)
Y_smooth = Y.evaluate(D_smooth)
Y_smooth = violin_width*Y_smooth/Y_smooth.max()
axes.fill_betweenx(D_smooth,X[x],X[x]+Y_smooth,facecolor=color,alpha=0.1)
axes.fill_betweenx(D_smooth,X[x],X[x]-Y_smooth,facecolor=color,alpha=0.1)
axes.plot(X[x]+Y_smooth,D_smooth,color=color,linewidth=linewidth,alpha=0.8)
axes.plot(X[x]-Y_smooth,D_smooth,color=color,linewidth=linewidth,alpha=0.8)
axes.plot([X[x]-Y_smooth[0],X[x]+Y_smooth[0]],[D_smooth[0],D_smooth[0]],color=color,linewidth=linewidth,alpha=0.8)
axes.plot([X[x]-Y_smooth[-1],X[x]+Y_smooth[-1]],[D_smooth[-1],D_smooth[-1]],color=color,linewidth=linewidth,alpha=0.8)
axes.plot(X[x]-Y_smooth,D_smooth,color=color,linewidth=linewidth,alpha=0.8)
axes.plot(X[x],np.percentile(data[x],50),'o',markersize=marker_size,markeredgewidth=linewidth,color=color)
axes.plot([X[x],X[x]],[np.percentile(data[x],25),np.percentile(data[x],75)],color=color,linewidth=2*linewidth,alpha=0.5)
axes.set_xlim(min(X)-1,max(X)+1)
axes.set_xlabel(xlabel,fontproperties=font, size=10, style='italic')
axes.set_xticklabels(axes.get_xticks(),fontproperties=font, size=12)
axes.set_ylabel(ylabel, fontproperties=font, size=10, style='italic')
axes.set_yticklabels(axes.get_yticks(),fontproperties=font, size=12)