def prepare_matplotlib_data(self, data_dict):
if type(data_dict.keys()[0]) == int:
x = sorted(data_dict.keys(), key=int)
elif type(data_dict.keys()[0]) == str or type(data_dict.keys()[0]) == unicode:
x = sorted(data_dict.keys())
data_means = []
data_sds = []
data_sems = []
for p in x:
mean = np.mean(data_dict[p])
sd = np.std(data_dict[p])
sem = ss.sem(data_dict[p])
data_means.append(mean)
data_sds.append(sd)
data_sems.append(sem)
return x, data_means, data_sds, data_sems
python类sem()的实例源码
def test_t_dist(self):
mean = 100
stdev = 100
sample_size = [250, 500, 1000, 2500, 3500, 5000, 8000, 10000]
for i in sample_size:
samples = np.random.normal(loc=mean, scale=stdev, size=i)
bsr = bs.bootstrap(samples, stat_func=bs_stats.mean, alpha=0.05)
mr = st.t.interval(1 - 0.05, len(samples) - 1, loc=np.mean(samples),
scale=st.sem(samples))
self.assertAlmostEqual(
bsr.lower_bound,
mr[0],
delta=mr[0] / 100.
)
self.assertAlmostEqual(
bsr.upper_bound,
mr[1],
delta=mr[1] / 100.
)
def tearDown(self):
lyaps = []
weights = []
for T in np.arange(self.DDE.t, self.DDE.t+1000, 10):
_, lyap, weight = self.DDE.integrate(T)
lyaps.append(lyap)
weights.append(weight)
lyaps = np.vstack(lyaps)
lyap_start = 40
for i,lyap_control in enumerate(lyap_controls):
lyap = np.average(lyaps[lyap_start:,i], weights=weights[lyap_start:])
stderr = sem(lyaps[lyap_start:,i])
print(lyap,stderr)
self.assertAlmostEqual(lyap_control, lyap, delta=3*stderr)
sklearn_data.py 文件源码
项目:-Classification-on-Chinese-Magazine-
作者: lixiaosi33
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def evaluate_cross_validation(clf, X, y, K):
# create a k-fold croos validation iterator of k=5 folds
cv = KFold(len(y), K, shuffle=True, random_state=0)
# by default the score used is the one returned by score method of the estimator (accuracy)
scores = cross_val_score(clf, X, y, cv=cv)
print scores
print ("Mean score: {0:.3f} (+/-{1:.3f})").format(
np.mean(scores), sem(scores))
def sem(self) -> float:
""" Standard error of the mean (standard deviation / sqrt(observations)) """
return st.sem(self.array)
def std_error_mean(self) -> float:
""" Standard error of the mean (standard deviation / sqrt(observations)) """
return st.sem(self.array)
def conf(x, width=0.95, axis=None):
def conf1(v):
return spstats.t.interval(width, len(v)-1,
loc=np.mean(v), scale=spstats.sem(v))
return conf1(x.ravel()) if axis is None else np.apply_along_axis(conf1, axis, x)
def plot_data(self, series):
f, (ax1) = plt.subplots(1, 1, sharex=True, sharey=False, figsize=(5.0, 4.0))
data_xtick_labels = self.data["10"].keys()
data_xticks = [int(x) for x in data_xtick_labels]
ylabel = None
if series == "mean":
ylabel = "Mean Latency"
elif series == "sd":
ylabel = "Standard Deviation of Latency"
elif series == "sem":
ylabel = "Standard Error of Mean of Latency"
self.plot_lines_with_error_bars(series,
ax1,
"Per Link Latency",
ylabel,
"",
y_scale='linear',
x_min_factor=0.75,
x_max_factor=1.1,
y_min_factor=0.9,
y_max_factor=1,
xticks=data_xticks,
xtick_labels=data_xtick_labels)
xlabels = ax1.get_xticklabels()
plt.setp(xlabels, rotation=0, fontsize=10)
# Shrink current axis's height by 25% on the bottom
box = ax1.get_position()
ax1.set_position([box.x0, box.y0 + box.height * 0.3, box.width, box.height * 0.7])
handles, labels = ax1.get_legend_handles_labels()
ax1.legend(handles, labels, shadow=True, fontsize=10, loc='upper center', ncol=2, markerscale=1.0,
frameon=True, fancybox=True, columnspacing=0.5, bbox_to_anchor=[0.5, -0.25])
plt.savefig(series + "_latency_evaluation_" + self.evaluation_type + ".png", dpi=1000)
plt.show()
def main():
# # Vary the delays (in miilseconds) on the links
link_latencies = [0]#[5, 10, 15, 20, 25]
# Vary the the amount of 'load' that is running by modifying the background emulation threads
background_specs = [0]#[0, 10, 20, 30, 40]
evaluation_type = "replay"
script_dir = os.path.dirname(os.path.realpath(__file__))
idx = script_dir.index('NetPower_TestBed')
base_dir = script_dir[0:idx] + "NetPower_TestBed"
bro_dnp3_parser_dir = base_dir + "/dnp3_timing/dnp3_parser_bro/"
# bro_json_log_conf = "/usr/local/bro/share/bro/policy/tuning/json-logs.bro"
bro_json_log_conf = "/home/rakesh/bro/scripts/policy/tuning/json-logs.bro"
# bro_cmd = "/usr/local/bro/bin/bro"
bro_cmd = "/usr/bin/bro"
p = PCAPPostProcessing(base_dir, bro_cmd, bro_json_log_conf, bro_dnp3_parser_dir,
link_latencies, background_specs, evaluation_type)
p.collect_data()
for series in ["mean", "sd", "sem"]:
p.plot_data(series)
# plt.hist(p.data['0']['5'], bins=40, histtype="step")
# plt.hist(p.data['0']['10'], bins=40, histtype="step")
# plt.hist(p.data['0']['15'], bins=40, histtype="step")
# plt.hist(p.data['0']['20'], bins=40, histtype="step")
# plt.hist(p.data['0']['25'], bins=40, histtype="step")
#
# plt.show()
#p.process_plotly()
def tearDown(self):
self.initialise_integrator()
times = range(10,100000,10)
data = np.vstack( self.ODE.integrate(time)[1] for time in times )
result = np.average(data[1000:], axis=0)
margin = standard_error(data[1000:], axis=0)
print(data,result,margin)
self.assertLess( np.max(margin), 0.003 )
for i in range(self.n):
self.assertLess( result[i]-lyaps[i], 3*margin[i] )
test_groupby.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def test_aggregate_str_func(self):
def _check_results(grouped):
# single series
result = grouped['A'].agg('std')
expected = grouped['A'].std()
assert_series_equal(result, expected)
# group frame by function name
result = grouped.aggregate('var')
expected = grouped.var()
assert_frame_equal(result, expected)
# group frame by function dict
result = grouped.agg(OrderedDict([['A', 'var'], ['B', 'std'],
['C', 'mean'], ['D', 'sem']]))
expected = DataFrame(OrderedDict([['A', grouped['A'].var(
)], ['B', grouped['B'].std()], ['C', grouped['C'].mean()],
['D', grouped['D'].sem()]]))
assert_frame_equal(result, expected)
by_weekday = self.tsframe.groupby(lambda x: x.weekday())
_check_results(by_weekday)
by_mwkday = self.tsframe.groupby([lambda x: x.month,
lambda x: x.weekday()])
_check_results(by_mwkday)
test_groupby.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def test_tab_completion(self):
grp = self.mframe.groupby(level='second')
results = set([v for v in dir(grp) if not v.startswith('_')])
expected = set(
['A', 'B', 'C', 'agg', 'aggregate', 'apply', 'boxplot', 'filter',
'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max',
'mean', 'median', 'min', 'name', 'ngroups', 'nth', 'ohlc', 'plot',
'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count',
'head', 'irow', 'describe', 'cummax', 'quantile', 'rank',
'cumprod', 'tail', 'resample', 'cummin', 'fillna', 'cumsum',
'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill', 'take',
'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', 'cov',
'dtypes', 'ndim', 'diff', 'idxmax', 'idxmin',
'ffill', 'bfill', 'pad', 'backfill'])
self.assertEqual(results, expected)
test_groupby.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def test_ops_general(self):
ops = [('mean', np.mean),
('median', np.median),
('std', np.std),
('var', np.var),
('sum', np.sum),
('prod', np.prod),
('min', np.min),
('max', np.max),
('first', lambda x: x.iloc[0]),
('last', lambda x: x.iloc[-1]),
('count', np.size), ]
try:
from scipy.stats import sem
except ImportError:
pass
else:
ops.append(('sem', sem))
df = DataFrame(np.random.randn(1000))
labels = np.random.randint(0, 50, size=1000).astype(float)
for op, targop in ops:
result = getattr(df.groupby(labels), op)().astype(float)
expected = df.groupby(labels).agg(targop)
try:
tm.assert_frame_equal(result, expected)
except BaseException as exc:
exc.args += ('operation: %s' % op, )
raise
test_nanops.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def test_nansem(self):
tm.skip_if_no_package('scipy.stats')
tm._skip_if_scipy_0_17()
from scipy.stats import sem
self.check_funs_ddof(nanops.nansem, sem, allow_complex=False,
allow_str=False, allow_date=False,
allow_tdelta=True, allow_obj='convert')
def mean_confidence_interval(data, confidence=0.95):
mean = np.mean(data)
conf_int = st.t.interval(confidence, len(data) - 1, loc=mean, scale=st.sem(data))
return mean, conf_int[0], conf_int[1]
def mean_confidence_interval(data, confidence=0.95):
a = 1.0*np.array(data)
n = len(a)
m, se = np.mean(a), st.sem(a)
h = se * st.t._ppf((1+confidence)/2., n-1)
return h
def plot_cluster_error(ax):
res_ftemp = "spatial_analysis/{}_{}_ifs.pkz"
for exp in ["dots", "sticks"]:
subjects = get_subject_order(exp)
color = get_colormap(exp, as_cmap=False)[20]
errs = []
for subj in subjects:
res = moss.load_pkl(res_ftemp.format(subj, exp))
x = res.steps
norm = res.null.mean()
errs.append(res.real / norm)
errs = np.vstack(errs)
mean = errs.mean(axis=0)
ax.plot(x, mean, color=color, lw=2)
sem = stats.sem(errs, axis=0)
ax.fill_between(x, mean - sem, mean + sem, alpha=.2, color=color)
ax.axhline(y=1, lw=1, dashes=[5, 2],
color=".5", zorder=0,
xmin=.02, xmax=.98)
ax.set(xlim=(0, 42),
ylim=(.55, 1.45),
yticks=[.6, .8, 1, 1.2, 1.4],
xticks=[0, 10, 20, 30, 40],
xlabel="Neighborhood radius (mm)",
ylabel="Normalized error")
sns.despine(ax=ax, trim=True)
plot-semi-supervised-grid-time.py 文件源码
项目:master-thesis
作者: AndreasMadsen
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def ci(x):
ymin, ymax = st.t.interval(0.95, x.count()-1, loc=0, scale=st.sem(x))
return ymax
def ci(x):
ymin, ymax = st.t.interval(0.95, x.count()-1, loc=0, scale=st.sem(x))
return ymax
def plot_return(agent, returns, data=None):
''' Plot return over time. '''
plt.plot(returns, agent.colour, label=agent.legend)
interval = 5000
if data != None:
for i in range(returns.size/interval):
plt.errorbar(1+i*interval, returns[i*interval],
yerr=t.sem(data[:, i*interval]), fmt=agent.colour)
plt.axis([0, returns.size, 0.0, 0.8])
plt.xlabel('Episodes')
plt.title('Average Return')
plt.ylabel('Average Return')
def ax_plot_lines(ax, xs, ys, colors, shapes, linestyles,
errorbar=False, linewidth=LINEWIDTH):
lines = []
for (x, y, c, s, l) in zip(xs, ys, colors, shapes, linestyles):
if errorbar:
# y should be a list of lists in this case
mean = [np.mean(yl) for yl in y]
error = [ss.sem(yl) for yl in y]
l = ax.errorbar(x, mean, yerr=error, color=c,
marker=s, linestyle=l, ecolor=c)
else:
l, = ax.plot(x, y, color=c, marker=s, linestyle=l, linewidth=linewidth)
lines.append(l)
return lines
def get_relation_strength(table_file, top=10, normalize=False,
return_sem=False, return_all=False):
type_list = load_all_pairs(table_file)
scores = {k: [abs(v.combined_score) for v in type_list[k][:top]]
for k in type_list}
mean = {k: np.mean(scores[k]) for k in type_list}
if return_all:
return scores, mean, {k: ss.sem(scores[k]) for k in type_list}
elif return_sem:
return mean, {k: ss.sem(scores[k]) for k in type_list}
elif normalize:
max_v = max(mean.values())
return {k: mean[k] / max_v for k in mean}
else:
return mean
def make_probes_ba_traj_fig(models1, models2=None, palette=None): # TODO ylim
"""
Returns fig showing trajectory of probes balanced accuracy
"""
start = time.time()
sns.set_style('white')
# load data
xys = []
model_groups = [models1] if models2 is None else [models1, models2]
for n, models in enumerate(model_groups):
model_probes_ba_trajs = []
for nn, model in enumerate(models):
model_probes_ba_trajs.append(model.get_traj('probes_ba'))
x = models[0].get_data_step_axis()
traj_mat = np.asarray([traj[:len(x)] for traj in model_probes_ba_trajs]) # all trajs are truncated to shortest
y = np.mean(traj_mat, axis=0)
sem = [stats.sem(model_probes_bas) for model_probes_bas in traj_mat.T]
xys.append((x, y, sem))
# fig
fig, ax = plt.subplots(figsize=(FigsConfigs.MAX_FIG_WIDTH, 3))
ax.set_ylim([50, 75])
ax.set_xlabel('Mini Batch', fontsize=FigsConfigs.AXLABEL_FONT_SIZE)
ax.set_ylabel('Probes Balanced Accuracy', fontsize=FigsConfigs.AXLABEL_FONT_SIZE)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.tick_params(axis='both', which='both', top='off', right='off')
ax.xaxis.set_major_formatter(FuncFormatter(human_format))
ax.yaxis.grid(True)
# plot
for (x, y, sem) in xys:
color = next(palette) if palette is not None else 'black'
ax.plot(x, y, '-', linewidth=FigsConfigs.LINEWIDTH, color=color)
ax.fill_between(x, np.add(y, sem), np.subtract(y, sem), alpha=FigsConfigs.FILL_ALPHA, color='grey')
plt.tight_layout()
print('{} completed in {:.1f} secs'.format(sys._getframe().f_code.co_name, time.time() - start))
return fig
def display_scores(params, scores, append_star=False):
"""Format the mean score +/- std error for params"""
params = ", ".join("{0}={1}".format(k, v)
for k, v in params.items())
line = "{0}:\t{1:.3f} (+/-{2:.3f})".format(
params, np.mean(scores), sem(scores))
if append_star:
line += " *"
return line
def display_grid_scores(grid_scores, top=None):
"""Helper function to format a report on a grid of scores"""
grid_scores = sorted(grid_scores, key=lambda x: x[1], reverse=True)
if top is not None:
grid_scores = grid_scores[:top]
# Compute a threshold for staring models with overlapping
# stderr:
_, best_mean, best_scores = grid_scores[0]
threshold = best_mean - 2 * sem(best_scores)
for params, mean_score, scores in grid_scores:
append_star = mean_score + 2 * sem(scores) > threshold
print(display_scores(params, scores, append_star=append_star))
def create_scipy_features(base_features, sentinel):
r"""Calculate the skew, kurtosis, and other statistical features
for each row.
Parameters
----------
base_features : numpy array
The feature dataframe.
sentinel : float
The number to be imputed for NaN values.
Returns
-------
sp_features : numpy array
The calculated SciPy features.
"""
logger.info("Creating SciPy Features")
# Generate scipy features
logger.info("SciPy Feature: geometric mean")
row_gmean = sps.gmean(base_features, axis=1)
logger.info("SciPy Feature: kurtosis")
row_kurtosis = sps.kurtosis(base_features, axis=1)
logger.info("SciPy Feature: kurtosis test")
row_ktest, pvalue = sps.kurtosistest(base_features, axis=1)
logger.info("SciPy Feature: normal test")
row_normal, pvalue = sps.normaltest(base_features, axis=1)
logger.info("SciPy Feature: skew")
row_skew = sps.skew(base_features, axis=1)
logger.info("SciPy Feature: skew test")
row_stest, pvalue = sps.skewtest(base_features, axis=1)
logger.info("SciPy Feature: variation")
row_var = sps.variation(base_features, axis=1)
logger.info("SciPy Feature: signal-to-noise ratio")
row_stn = sps.signaltonoise(base_features, axis=1)
logger.info("SciPy Feature: standard error of mean")
row_sem = sps.sem(base_features, axis=1)
sp_features = np.column_stack((row_gmean, row_kurtosis, row_ktest,
row_normal, row_skew, row_stest,
row_var, row_stn, row_sem))
sp_features = impute_values(sp_features, 'float64', sentinel)
sp_features = StandardScaler().fit_transform(sp_features)
# Return new SciPy features
logger.info("SciPy Feature Count : %d", sp_features.shape[1])
return sp_features
#
# Function create_clusters
#
test_groupby.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def test_cythonized_aggers(self):
data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan],
'B': ['A', 'B'] * 6,
'C': np.random.randn(12)}
df = DataFrame(data)
df.loc[2:10:2, 'C'] = nan
def _testit(name):
op = lambda x: getattr(x, name)()
# single column
grouped = df.drop(['B'], axis=1).groupby('A')
exp = {}
for cat, group in grouped:
exp[cat] = op(group['C'])
exp = DataFrame({'C': exp})
exp.index.name = 'A'
result = op(grouped)
assert_frame_equal(result, exp)
# multiple columns
grouped = df.groupby(['A', 'B'])
expd = {}
for (cat1, cat2), group in grouped:
expd.setdefault(cat1, {})[cat2] = op(group['C'])
exp = DataFrame(expd).T.stack(dropna=False)
exp.index.names = ['A', 'B']
exp.name = 'C'
result = op(grouped)['C']
if not tm._incompat_bottleneck_version(name):
assert_series_equal(result, exp)
_testit('count')
_testit('sum')
_testit('std')
_testit('var')
_testit('sem')
_testit('mean')
_testit('median')
_testit('prod')
_testit('min')
_testit('max')
def make_test_and_train_pp_traj_fig(models1, models2=None, palette=None, ):
"""
Returns fig showing trajectory of test and train perplexity
"""
start = time.time()
sns.set_style('white')
# load data
xys = []
model_groups = [models1] if models2 is None else [models1, models2]
for n, models in enumerate(model_groups):
model_test_pp_trajs = []
model_train_pp_trajs = []
for model in models:
model_test_pp_trajs.append(model.get_traj('test_pp'))
model_train_pp_trajs.append(model.get_traj('train_pp'))
x = models[0].get_data_step_axis()
traj_mat1 = np.asarray([traj[:len(x)] for traj in model_test_pp_trajs])
traj_mat2 = np.asarray([traj[:len(x)] for traj in model_train_pp_trajs])
y1 = np.mean(traj_mat1, axis=0)
y2 = np.mean(traj_mat2, axis=0)
sem1 = [stats.sem(row) for row in np.asarray(traj_mat1).T]
sem2 = [stats.sem(row) for row in np.asarray(traj_mat2).T]
xys.append((x, y1, y2, sem1, sem2))
# fig
fig, ax = plt.subplots(figsize=(FigsConfigs.MAX_FIG_WIDTH, 3))
ax.set_ylim([0, models1[0].terms.num_set_])
ax.set_ylabel('Perplexity', fontsize=FigsConfigs.AXLABEL_FONT_SIZE)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.tick_params(axis='both', which='both', top='off', right='off')
ax.set_xlabel('Mini Batch', fontsize=FigsConfigs.AXLABEL_FONT_SIZE)
ax.xaxis.set_major_formatter(FuncFormatter(human_format))
ax.yaxis.grid(True)
# plot
for (x, y1, y2, sem1, sem2) in xys:
color = next(palette) if palette is not None else 'black'
ax.plot(x, y1, '-', linewidth=FigsConfigs.LINEWIDTH, color=color, linestyle='-', label='Test')
ax.plot(x, y2, '-', linewidth=FigsConfigs.LINEWIDTH, color=color, linestyle='--', label='Train')
ax.fill_between(x, np.add(y1, sem1), np.subtract(y1, sem1), alpha=FigsConfigs.FILL_ALPHA, color='grey')
ax.fill_between(x, np.add(y2, sem2), np.subtract(y2, sem2), alpha=FigsConfigs.FILL_ALPHA, color='grey')
plt.tight_layout()
plt.legend(loc='best')
print('{} completed in {:.1f} secs'.format(sys._getframe().f_code.co_name, time.time() - start))
return fig