def test_megkde_2d_basic():
# Draw from normal, fit KDE, see if sampling from kde's pdf recovers norm
np.random.seed(1)
data = np.random.multivariate_normal([0, 1], [[1.0, 0.], [0., 0.75 ** 2]], size=10000)
xs, ys = np.linspace(-4, 4, 50), np.linspace(-4, 4, 50)
xx, yy = np.meshgrid(xs, ys, indexing='ij')
samps = np.vstack((xx.flatten(), yy.flatten())).T
zs = MegKDE(data).evaluate(samps).reshape(xx.shape)
zs_x = zs.sum(axis=1)
zs_y = zs.sum(axis=0)
cs_x = zs_x.cumsum()
cs_x /= cs_x[-1]
cs_x[0] = 0
cs_y = zs_y.cumsum()
cs_y /= cs_y[-1]
cs_y[0] = 0
samps_x = interp1d(cs_x, xs)(np.random.uniform(size=10000))
samps_y = interp1d(cs_y, ys)(np.random.uniform(size=10000))
mu_x, std_x = norm.fit(samps_x)
mu_y, std_y = norm.fit(samps_y)
assert np.isclose(mu_x, 0, atol=0.1)
assert np.isclose(std_x, 1.0, atol=0.1)
assert np.isclose(mu_y, 1, atol=0.1)
assert np.isclose(std_y, 0.75, atol=0.1)
python类fit()的实例源码
def fit(self, X, y):
self.base_models_ = [list() for x in self.base_models]
self.meta_model_ = clone(self.meta_model)
kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=15)
# train cloned base models then create out-of-fold predictions that are needed to train the cloned meta-model
out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models)))
for i, model in enumerate(self.base_models):
for train_index, holdout_index in kfold.split(X, y):
instance = clone(model)
self.base_models_[i].append(instance)
instance.fit(X[train_index], y[train_index])
y_pred = instance.predict(X[holdout_index])
out_of_fold_predictions[holdout_index, i] = y_pred
# now train the cloned meta-model using the out-of-fold predictions as new feature
self.meta_model_.fit(out_of_fold_predictions, y)
return self
# do the predictions of all base models on the test data and use the averaged predictions as
#meta-features for the final prediction which is done by the meta-model
def test_megkde_1d_basic():
# Draw from normal, fit KDE, see if sampling from kde's pdf recovers norm
np.random.seed(0)
data = np.random.normal(loc=0, scale=1.0, size=2000)
xs = np.linspace(-3, 3, 100)
ys = MegKDE(data).evaluate(xs)
cs = ys.cumsum()
cs /= cs[-1]
cs[0] = 0
samps = interp1d(cs, xs)(np.random.uniform(size=10000))
mu, std = norm.fit(samps)
assert np.isclose(mu, 0, atol=0.1)
assert np.isclose(std, 1.0, atol=0.1)
def test_megkde_1d_uniform_weight():
# Draw from normal, fit KDE, see if sampling from kde's pdf recovers norm
np.random.seed(0)
data = np.random.normal(loc=0, scale=1.0, size=2000)
xs = np.linspace(-3, 3, 100)
ys = MegKDE(data, weights=np.ones(2000)).evaluate(xs)
cs = ys.cumsum()
cs /= cs[-1]
cs[0] = 0
samps = interp1d(cs, xs)(np.random.uniform(size=10000))
mu, std = norm.fit(samps)
assert np.isclose(mu, 0, atol=0.1)
assert np.isclose(std, 1.0, atol=0.1)
def test_megkde_1d_changing_weights():
# Draw from normal, fit KDE, see if sampling from kde's pdf recovers norm
np.random.seed(0)
xs = np.linspace(-3, 3, 1000)
weights = norm.pdf(xs)
ys = MegKDE(xs, weights=weights).evaluate(xs)
cs = ys.cumsum()
cs /= cs[-1]
cs[0] = 0
samps = interp1d(cs, xs)(np.random.uniform(size=10000))
mu, std = norm.fit(samps)
assert np.isclose(mu, 0, atol=0.1)
assert np.isclose(std, 1.0, atol=0.1)
def logistic_fidelity(self):
#group data and assign state labels
gnd_features = np.hstack([np.real(self.ground_data.T),
np.imag(self.ground_data.T)])
ex_features = np.hstack([np.real(self.excited_data.T),
np.imag(self.excited_data.T)])
#liblinear wants arrays in C order
features = np.ascontiguousarray(np.vstack([gnd_features, ex_features]))
state = np.ascontiguousarray(np.hstack([np.zeros(self.ground_data.shape[1]),
np.ones(self.excited_data.shape[1])]))
#Set up logistic regression with cross-validation using liblinear.
#Cs sets the inverse of the regularization strength, which will be optimized
#through cross-validation. Uses the default Stratified K-Folds
#CV generator, with 3 folds.
#This is set up to be as consistent with the MATLAB implementation
#as I can make it. --GJR
Cs = np.logspace(-1,2,5)
logreg = LogisticRegressionCV(Cs, cv=3, solver='liblinear')
logreg.fit(features, state) #fit the model
predictions = logreg.predict(features) #in-place classification
score = logreg.score(features,state) #mean accuracy of classification
N = len(predictions)
S = np.sum(predictions == state) #how many we got right
#now calculate confidence intervals
c = 0.95
flo = betaincinv(S+1, N-S+1, (1-c)/2., )
fhi = betaincinv(S+1, N-S+1, (1+c)/2., )
logger.info(("In-place logistic regression fidelity: " +
"{:.2f}% ({:.2f}, {:.2f})".format(100*score, 100*flo, 100*fhi)))
def _rerender(self):
nmr_maps = len(self.maps_to_show)
if self._show_trace:
nmr_maps *= 2
grid = GridSpec(nmr_maps, 1, left=0.04, right=0.96, top=0.94, bottom=0.06, hspace=0.2)
i = 0
for map_name in self.maps_to_show:
samples = self._voxels[map_name]
if self._sample_indices is not None:
samples = samples[:, self._sample_indices]
title = map_name
if map_name in self.names:
title = self.names[map_name]
if isinstance(self._nmr_bins, dict) and map_name in self._nmr_bins:
nmr_bins = self._nmr_bins[map_name]
else:
nmr_bins = self._nmr_bins
hist_plot = plt.subplot(grid[i])
n, bins, patches = hist_plot.hist(np.nan_to_num(samples[self.voxel_ind, :]), nmr_bins, normed=True)
plt.title(title)
i += 1
if self._fit_gaussian:
mu, sigma = norm.fit(samples[self.voxel_ind, :])
bincenters = 0.5*(bins[1:] + bins[:-1])
y = mlab.normpdf(bincenters, mu, sigma)
hist_plot.plot(bincenters, y, 'r', linewidth=1)
if self._show_trace:
trace_plot = plt.subplot(grid[i])
trace_plot.plot(samples[self.voxel_ind, :])
i += 1
def display_distrib(pd, feature):
plt.figure()
sns.distplot(pd[feature].dropna() , fit=norm);
(mu, sigma) = norm.fit(pd[feature].dropna())
plt.legend(['Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu, sigma)], loc='best')
plt.ylabel('Frequency')
plt.title('SalePrice distribution')
plt.show()
def __init__(self, models):
self.models = models
# we define clones of the original models to fit the data in
def fit(self, X, y):
self.models_ = [clone(x) for x in self.models]
# Train cloned base models
for model in self.models_:
model.fit(X, y)
return self
# now we do the predictions for cloned models and average them
def __init__(self, base_models, meta_model, n_folds=5):
self.base_models = base_models
self.meta_model = meta_model
self.n_folds = n_folds
# we again fit the data on clones of the original models
def central_limit(rvs, n, length):
rv_mean = np.zeros(length)
for i in range(n):
rv = rvs(size=length)
rv_mean = rv_mean + rv
rv_mean = rv_mean / n
gaussian_params = norm_dist.fit(rv_mean)
gaussian = norm_dist(gaussian_params[0], gaussian_params[1])
return rv_mean, gaussian
def graphing(what, data, who):
kek = data[who][what]
fig = figure()
ax = fig.add_subplot(111)
if '\xa0' in str(kek):
kek = (int(kek.replace(u'\xa0', '')))
data = stuff[what]
mean, sigma = norm.fit(data)
plt.hist(data, bins = 1000, normed=True, alpha=0.4)
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 1000)
p = norm.pdf(x, mean, sigma)
plt.plot(x, p, 'k', linewidth=2)
title = f'{what}\nFit results: mean = {round(mean,2)}, sigma = {round(sigma,2)}'
plt.title(title)
two, eight = norm.interval(.999, loc = mean, scale = sigma)
plt.xlim(0, eight)
new = norm.pdf(kek, mean, sigma)
plt.plot([kek, kek], [0, new], color='r', linestyle='--')
if what == 'Death/Game':
at = round((1-norm.cdf(kek, mean, sigma))*100, 2)
else:
at = round(norm.cdf(kek, mean, sigma)*100, 2)
text = f'You are better than \n{at}% players'
ax.annotate(text, xy=(kek, new), xytext=(0.7, 0.8), textcoords='axes fraction',
arrowprops=dict(facecolor='g', color='g'))
what = what.replace('/', '-')
#plt.show()
plt.savefig(f'Y:/python/graphs/data/{what}.png')
#plt.close()
def scales(stuff):
limits = {}
for key in stuff.keys():
try:
data = stuff[key]
mean, sigma = norm.fit(data)
one, nine = norm.interval(.99, loc = mean, scale = sigma)
two, eight = norm.interval(.9, loc = mean, scale = sigma)
three, seven = norm.interval(.7, loc = mean, scale = sigma)
four, six = norm.interval(.5, loc = mean, scale = sigma)
five = mean
if key != 'Karma':
if four < 0:
four = five/2
if three < 0:
three = four/2
if two < 0:
two = three/2
if one < 0:
one = two/2
if key == 'Fleet Strength':
if nine > 200:
nine = 200
if key == 'Eff rating':
if nine > 9000:
nine = 9000
if key == 'Death/Game' or key == 'Total deaths':
limits[key] = {one: 9, two: 8, three: 7, four: 6, five: 5,
six: 4, seven: 3, eight: 2, nine: 1}
else:
limits[key] = {one: 1, two: 2, three: 3, four: 4, five: 5,
six: 6, seven: 7, eight: 8, nine: 9}
except: continue
return limits
def fit_distribution(self):
y = np.zeros(self.num_sent)
iter = 0
for key in self.stats:
iter_end = self.stats[key].num_sents + iter
y[iter:iter_end] = key
iter = iter_end
mu, std = norm.fit(y)
self.distr = norm(mu, std)
def __get_est_price_mode_pe(realtime, code, years):
q_str = 'code==' + '\"' + code + '\"'
pe_obj = {}
eps = 0
for y in range(datetime.now().year - years, datetime.now().year + 1):
for q in range(1, 5):
quarter = str(y)+'q'+str(q)
if (os.path.exists(PREFIX+'/'+quarter+'.csv')):
r = __pd_read_report(quarter)
if (len(r.query(q_str)) > 0):
# save all pe history and latest eps
tmp_pe, tmp_eps = __get_pe_and_eps(code, quarter)
if (isinstance(tmp_pe, float) & isinstance(tmp_eps, float)):
pe_obj[quarter] = tmp_pe
eps = tmp_eps
log.debug('%s pe: %.2f, eps: %.2f', quarter, pe_obj[quarter], eps)
else:
log.warn('skip %s', quarter)
continue
#sorted(pe_obj)
#log.debug(pe_obj)
arr = pe_obj.values()
mu, std = norm.fit(arr)
if (realtime):
d = datetime.now()
today = __pd_read_today_all()
close = round(today.query(q_str).trade.values[0], 2)
else:
k, d = __get_k_data_of_last_trade_day(code)
close = round(k.close.values[0], 2)
log.info('%s price: %.2f @ pe %.2f, eps %.2f', d.strftime("%Y-%m-%d"), close, close/eps, eps)
log.info('mu, std: %.2f, %.2f', mu, std)
growth = __get_growth(code, years)
left = __estimation_formula_bg_dynamic(growth, eps, mu - std)
centrum = __estimation_formula_bg_dynamic(growth, eps, mu)
right = __estimation_formula_bg_dynamic(growth, eps, mu + std)
value = __estimation_formula_bg_dynamic(growth, eps, 8.5)
log.info('est dynamic: %.2f~%.2f~%.2f', left, centrum, right)
log.info('est value: %.2f', value)
log.info('range from left: %.2f%%', (close-left)/left*100.0)
log.info('position: %.2f%%', (close-left)/(right-left)*100.0)
return left, centrum, right, value
def __get_est_price_mode_pb(realtime, code, years):
q_str = 'code==' + '\"' + code + '\"'
pb_obj = {}
bvps = 0
for y in range(datetime.now().year - years, datetime.now().year + 1):
for q in range(1, 5):
quarter = str(y)+'q'+str(q)
if (os.path.exists(PREFIX+'/'+quarter+'.csv')):
r = __pd_read_report(quarter)
if (len(r.query(q_str)) > 0):
# save all pb history and latest bvps
tmp_pb, tmp_bvps = __get_pb_and_bvps(code, quarter)
if (isinstance(tmp_pb, float) & isinstance(tmp_bvps, float)):
pb_obj[quarter] = tmp_pb
bvps = tmp_bvps
log.debug('%s pb: %.2f, bvps: %.2f', quarter, pb_obj[quarter], bvps)
else:
log.warn('skip %s', quarter)
continue
#sorted(pb_obj)
#log.debug(pb_obj)
arr = pb_obj.values()
mu, std = norm.fit(arr)
if (realtime):
d = datetime.now()
today = __pd_read_today_all()
close = round(today.query(q_str).trade.values[0], 2)
else:
k, d = __get_k_data_of_last_trade_day(code)
close = round(k.close.values[0], 2)
log.info('%s price: %.2f @ pb %.2f, bvps %.2f', d.strftime("%Y-%m-%d"), close, close/bvps, bvps)
log.info('mu, std: %.2f, %.2f', mu, std)
left = __estimation_formula_pb(bvps, mu - std)
centrum = __estimation_formula_pb(bvps, mu)
right = __estimation_formula_pb(bvps, mu + std)
value = __estimation_formula_pb(bvps, 1.0)
log.info('est dynamic: %.2f~%.2f~%.2f', left, centrum, right)
log.info('est value: %.2f', value)
log.info('range from left: %.2f%%', (close-left)/left*100.0)
log.info('position: %.2f%%', (close-left)/(right-left)*100.0)
return left, centrum, right, value
def show(self, voxel_ind=0, names=None, maps_to_show=None, to_file=None, block=True, maximize=False,
show_trace=True, nmr_bins=20, window_title=None, show_sliders=True, fit_gaussian=True,
figure_options=None, sample_indices=None):
"""Show the samples per voxel.
Args:
voxel_ind (int): the voxel to show the samples from.
names (dict): A list of names for the different maps. Use as ``{map_name: display_name}`` that is,
the key is the name of the map in the volumes dictionary and the display name is the string that will
be used as title for that map.
maps_to_show (:class:`list`): A list of maps to show.
The items in this list must correspond to the keys in the volumes dictionary.
to_file (string, optional, default None): If to_file is not None it is supposed
to be a filename where the image will be saved.
If not set to None, nothing will be displayed, the results will directly be saved.
Already existing items will be overwritten.
block (boolean): If we want to block after calling the plots or not. Set this to False if you
do not want the routine to block after drawing. In doing so you manually need to block.
maximize (boolean): if we want to display the window maximized or not
show_trace (boolean): if we show the trace of each map or not
nmr_bins (dict or int): either a single value or one per map name
show_sliders (boolean): if we show the slider or not
fit_gaussian (boolean): if we fit and show a normal distribution (Gaussian) to the histogram or not
window_title (str): the title of the window. If None, the default title is used
figure_options (dict) options for the figure
sample_indices (list): the list of sample indices to use
"""
figure_options = figure_options or {'figsize': (18, 16)}
self._figure = plt.figure(**figure_options)
if names:
self.names = names
if maps_to_show:
self.maps_to_show = maps_to_show
self.voxel_ind = voxel_ind
self._nmr_bins = nmr_bins or self._nmr_bins
self._show_trace = show_trace
self.show_sliders = show_sliders
self._fit_gaussian = fit_gaussian
self._sample_indices = sample_indices
self._setup()
if maximize:
mng = plt.get_current_fig_manager()
mng.window.showMaximized()
if window_title:
mng = plt.get_current_fig_manager()
mng.canvas.set_window_title(window_title)
if to_file:
plt.savefig(to_file)
plt.close()
else:
plt.draw()
if block:
plt.show(True)
def calibrateNoise(self):
bg, bgstd, las, time, conc, ok = CalibrationDialog.setExt()
_np.asarray(bg)
_np.asarray(bgstd)
_np.asarray(las)
_np.asarray(time)
_np.asarray(conc)
x_3d = _np.array([conc, las, time])
p0 = [1, 1]
fitParamsBg, fitCovariances = curve_fit(fitFuncBg, x_3d, bg, p0)
print(' fit coefficients :\n', fitParamsBg)
# SET VALUES TO PARAMETER
self.lasercEdit.setValue(fitParamsBg[0])
self.imagercEdit.setValue(fitParamsBg[1])
x_3dStd = _np.array([las, time, bg])
p0S = [1, 1, 1]
fitParamsStd, fitCovariances = curve_fit(fitFuncStd, x_3dStd, bgstd, p0S)
print(' fit coefficients2:\n', fitParamsStd)
self.EquationAEdit.setValue(fitParamsStd[0])
self.EquationBEdit.setValue(fitParamsStd[1])
self.EquationCEdit.setValue(fitParamsStd[2])
# Noise model working point
figure4 = plt.figure()
# Background
bgmodel = fitFuncBg(x_3d, fitParamsBg[0], fitParamsBg[1])
ax1 = figure4.add_subplot(121)
ax1.cla()
ax1.plot(bg, bgmodel, 'o')
x = _np.linspace(*ax1.get_xlim())
ax1.plot(x, x)
title = "Background Model:"
ax1.set_title(title)
# Std
bgmodelstd = fitFuncStd(x_3dStd, fitParamsStd[0], fitParamsStd[1], fitParamsStd[2])
ax2 = figure4.add_subplot(122)
ax2.cla()
ax2.plot(bgstd, bgmodelstd, 'o')
x = _np.linspace(*ax2.get_xlim())
ax2.plot(x, x)
title = "Background Model Std:"
ax2.set_title(title)
figure4.show()
def run(self):
"""
NOTE: Gibbs sampling is a way of selecting variables one at a time instead of all at once. This is beneficial in
high dimensional variable space because it will increase the probability of accepting a new sample. It isn't
implemented here, but it might be worth keeping in mind.
"""
counter = 0
while not self._check_convergence() and (counter <= self.steps or self.steps == 0):
tmp_dump = self.dumpfile + ".temp"
with open(tmp_dump, "wb") as ofile:
dump_obj = [chain._dump_obj() for chain in self.chains]
dill.dump(dump_obj, ofile, protocol=-1)
shutil.move(tmp_dump, self.dumpfile)
counter += 1
child_list = OrderedDict()
for chain in self.chains: # Note that this will spin off (c * w) new processes, where c=chains, w=walkers
for walker in chain.walkers:
# Start new process
func_args = []
for variable in walker.variables:
if walker.lava:
variable.draw_random()
else:
variable.draw_new_value(walker.heat)
func_args.append(variable.draw_value)
# Always add a new seed for the target function
func_args.append(self.rand_gen.randint(1, 999999999999999))
p = Process(target=self.mc_step_run, args=(walker, [func_args]))
p.start()
child_list[walker.name] = p
# wait for remaining processes to complete
while len(child_list) > 0:
for _name, child in child_list.items():
if child.is_alive():
continue
else:
del child_list[_name]
break
for chain in self.chains:
# Get the normalized standard deviation among all historical walker scores for this chain
history_series = pd.Series([score for walker in chain.walkers for score in walker.score_history])
mu, std = norm.fit(history_series)
for walker in chain.walkers:
self.step_parse(walker, std)
if self.best["score"] is None or walker.current_score > self.best["score"]:
self.best["score"] = walker.current_score
self.best["variables"] = OrderedDict([(x.name, x.current_value) for x in walker.variables])
for chain in self.chains:
chain.swap_hot_cold()
# Send output to files
if counter % self.sample_rate == 0:
chain.step_counter += 1
chain.write_sample()
return