def guess(representation, sims, xi, a, a_, b):
sa = sims[xi[a]]
sa_ = sims[xi[a_]]
sb = sims[xi[b]]
add_sim = -sa+sa_+sb
if a in representation.wi:
add_sim[representation.wi[a]] = 0
if a_ in representation.wi:
add_sim[representation.wi[a_]] = 0
if b in representation.wi:
add_sim[representation.wi[b]] = 0
b_add = representation.iw[np.nanargmax(add_sim)]
mul_sim = sa_*sb*np.reciprocal(sa+0.01)
if a in representation.wi:
mul_sim[representation.wi[a]] = 0
if a_ in representation.wi:
mul_sim[representation.wi[a_]] = 0
if b in representation.wi:
mul_sim[representation.wi[b]] = 0
b_mul = representation.iw[np.nanargmax(mul_sim)]
return b_add, b_mul
python类nanargmax()的实例源码
def guess(representation, sims, xi, a, a_, b):
sa = sims[xi[a]]
sa_ = sims[xi[a_]]
sb = sims[xi[b]]
add_sim = -sa+sa_+sb
if a in representation.wi:
add_sim[representation.wi[a]] = 0
if a_ in representation.wi:
add_sim[representation.wi[a_]] = 0
if b in representation.wi:
add_sim[representation.wi[b]] = 0
b_add = representation.iw[np.nanargmax(add_sim)]
mul_sim = sa_*sb*np.reciprocal(sa+0.01)
if a in representation.wi:
mul_sim[representation.wi[a]] = 0
if a_ in representation.wi:
mul_sim[representation.wi[a_]] = 0
if b in representation.wi:
mul_sim[representation.wi[b]] = 0
b_mul = representation.iw[np.nanargmax(mul_sim)]
return b_add, b_mul
def test_nanargmax(self):
tgt = np.argmax(self.mat)
for mat in self.integer_arrays():
assert_equal(np.nanargmax(mat), tgt)
def test_nanargmax(self):
tgt = np.argmax(self.mat)
for mat in self.integer_arrays():
assert_equal(np.nanargmax(mat), tgt)
def generalized_esd(x, r, alpha=0.05, method='mean'):
"""Generalized ESD test for outliers
(http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h3.htm).
Args:
x (numpy.ndarray): the data
r (int): max number of outliers
alpha (float): the signifiance level
method (str): 'median' or 'mean'
Returns:
list[int]: list of the index of outliers
"""
x = np.asarray(x, dtype=np.float64)
fn = __get_pd_median if method == 'median' else __get_pd_mean
NaN = float('nan')
outliers = []
N = len(x)
for i in range(1, r + 1):
if np.any(~np.isnan(x)):
m, e = fn(x)
if e != 0.:
y = np.abs(x - m)
j = np.nanargmax(y)
R = y[j]
lam = __get_lambda_critical(N, i, alpha)
if R > lam * e:
outliers.append(j)
x[j] = NaN
else:
break
else:
break
else:
break
return outliers
evaluate_decision_makers.py 文件源码
项目:motion-classification
作者: matthiasplappert
项目源码
文件源码
阅读 105
收藏 0
点赞 0
评论 0
def _select_best_score(scores, args):
return np.nanargmax(np.array(scores))
def _select_best_measure_index(curr_measures, args):
idx = None
try:
if args.measure == 'aicc':
# The best score for AICc is the minimum.
idx = np.nanargmin(curr_measures)
elif args.measure in ['hmm-distance', 'wasserstein', 'mahalanobis']:
# The best score for the l-d measure is the maximum.
idx = np.nanargmax(curr_measures)
except:
idx = random.choice(range(len(curr_measures)))
assert idx is not None
return idx
def test_nanargmax(self):
tgt = np.argmax(self.mat)
for mat in self.integer_arrays():
assert_equal(np.nanargmax(mat), tgt)
def choose_arm(x, experts, explore):
n_arms = len(experts)
# make predictions
preds = [expert.predict(x) for expert in experts]
# get best arm
arm_max = np.nanargmax(preds)
# create arm selection probabilities
P = [(1-explore)*(arm==arm_max) + explore/n_arms for arm in range(n_arms)]
# select an arm
chosen_arm = np.random.choice(np.arange(n_arms), p=P)
pred = preds[chosen_arm]
return chosen_arm, pred
def predict_ana( model, a, a2, b, realb2 ):
questWordIndices = [ model.word2id[x] for x in (a,a2,b) ]
# b2 is effectively iterating through the vocab. The row is all the cosine values
b2a2 = model.sim_row(a2)
b2a = model.sim_row(a)
b2b = model.sim_row(b)
addsims = b2a2 - b2a + b2b
addsims[questWordIndices] = -10000
iadd = np.nanargmax(addsims)
b2add = model.vocab[iadd]
# For debugging purposes
ia = model.word2id[a]
ia2 = model.word2id[a2]
ib = model.word2id[b]
ib2 = model.word2id[realb2]
realaddsim = addsims[ib2]
mulsims = ( b2a2 + 1 ) * ( b2b + 1 ) / ( b2a + 1.001 )
mulsims[questWordIndices] = -10000
imul = np.nanargmax(mulsims)
b2mul = model.vocab[imul]
return b2add, b2mul
def test_nanargmax(self):
tgt = np.argmax(self.mat)
for mat in self.integer_arrays():
assert_equal(np.nanargmax(mat), tgt)
def test_nanargmax(self):
tgt = np.argmax(self.mat)
for mat in self.integer_arrays():
assert_equal(np.nanargmax(mat), tgt)
def decode_location(likelihood, pos_centers, time_centers):
"""Finds the decoded location based on the centers of the position bins.
Parameters
----------
likelihood : np.array
With shape(n_timebins, n_positionbins)
pos_centers : np.array
time_centers : np.array
Returns
-------
decoded : nept.Position
Estimate of decoded position.
"""
prob_rows = np.sum(np.isnan(likelihood), axis=1) < likelihood.shape[1]
max_decoded_idx = np.nanargmax(likelihood[prob_rows], axis=1)
prob_decoded = pos_centers[max_decoded_idx]
decoded_pos = np.empty((likelihood.shape[0], pos_centers.shape[1])) * np.nan
decoded_pos[prob_rows] = prob_decoded
decoded_pos = np.squeeze(decoded_pos)
return nept.Position(decoded_pos, time_centers)
def maxabs(trace, starttime=None, endtime=None):
"""Returns the maximum of the absolute values of `trace`, and its occurrence time.
In other words, returns the point `(time, value)` where `value = max(abs(trace.data))`
and time (`UTCDateTime`) is the time occurrence of `value`
:param trace: the input obspy.core.Trace
:param starttime: (`obspy.UTCDateTime`) the start time (None or missing defaults to the trace
end): the maximum of the trace `abs` will be searched *from* this time. This argument,
if provided, does not affect the
returned `time` which will be always relative to the trace passed as argument
:param endtime: an obspy UTCDateTime object (or any value
`UTCDateTime` accepts, e.g. integer / `datetime` object) denoting
the end time (None or missing defaults to the trace end): the maximum of the trace `abs`
will be searched *until* this time
:return: the tuple (time, value) where `value = max(abs(trace.data))`, and time is
the value occurrence (`UTCDateTime`)
:return: the tuple `(time_of_max_abs, max_abs)`
"""
original_stime = None if starttime is None else trace.stats.starttime
if starttime is not None or endtime is not None:
# from the docs: "this returns a New Trace object
# Does not copy data but just passes a reference to it"
trace = trace.slice(starttime, endtime)
if trace.stats.npts < 1:
return np.nan
idx = np.nanargmax(np.abs(trace.data))
val = trace.data[idx]
tdelta = 0 if original_stime is None else trace.stats.starttime - original_stime
time = timeof(trace, idx) + tdelta
return (time, val)
def optimize_threshold_with_roc(roc, thresholds, criterion='dist'):
if roc.shape[1] > roc.shape[0]:
roc = roc.T
assert(roc.shape[0] == thresholds.shape[0])
if criterion == 'margin':
scores = roc[:,1]-roc[:,0]
else:
scores = -cdist(np.array([[0,1]]), roc)
ti = np.nanargmax(scores)
return thresholds[ti], ti
def optimize_threshold_with_prc(prc, thresholds, criterion='min'):
prc[np.isnan(prc)] = 0
if prc.shape[1] > prc.shape[0]:
prc = prc.T
assert(prc.shape[0] == thresholds.shape[0])
if criterion == 'sum':
scores = prc.sum(axis=1)
elif criterion.startswith('dist'):
scores = -cdist(np.array([[1,1]]), prc)
else:
scores = prc.min(axis=1)
ti = np.nanargmax(scores)
return thresholds[ti], ti
def optimize_threshold_with_f1(f1c, thresholds, criterion='max'):
#f1c[np.isnan(f1c)] = 0
if criterion == 'max':
ti = np.nanargmax(f1c)
else:
ti = np.nanargmin(np.abs(thresholds-0.5*f1c))
#assert(np.all(thresholds>=0))
#idx = (thresholds>=f1c*0.5-mp) & (thresholds<=f1c*0.5+mp)
#assert(np.any(idx))
#ti = np.where(idx)[0][f1c[idx].argmax()]
return thresholds[ti], ti
def compute_draw_info(self, x, ys):
bs = self.compute_baseline(x, ys)
im = np.nanargmax(ys-bs, axis=1)
lines = (x[im], bs[np.arange(bs.shape[0]), im]), (x[im], ys[np.arange(ys.shape[0]), im])
return [("curve", (x, self.compute_baseline(x, ys), INTEGRATE_DRAW_BASELINE_PENARGS)),
("curve", (x, ys, INTEGRATE_DRAW_BASELINE_PENARGS)),
("line", lines)]
def compute_integral(self, x_s, y_s):
y_s = y_s - self.compute_baseline(x_s, y_s)
if len(x_s) == 0:
return np.zeros((y_s.shape[0],)) * np.nan
# avoid whole nan rows
whole_nan_rows = np.isnan(y_s).all(axis=1)
y_s[whole_nan_rows] = 0
# select positions
pos = x_s[np.nanargmax(y_s, axis=1)]
# set unknown results
pos[whole_nan_rows] = np.nan
return pos
def compute_draw_info(self, x, ys):
bs = self.compute_baseline(x, ys)
im = np.nanargmax(ys-bs, axis=1)
lines = (x[im], bs[np.arange(bs.shape[0]), im]), (x[im], ys[np.arange(ys.shape[0]), im])
return [("curve", (x, self.compute_baseline(x, ys), INTEGRATE_DRAW_BASELINE_PENARGS)),
("curve", (x, ys, INTEGRATE_DRAW_BASELINE_PENARGS)),
("line", lines)]
def test_nanargmax(self):
tgt = np.argmax(self.mat)
for mat in self.integer_arrays():
assert_equal(np.nanargmax(mat), tgt)
experiment_utils.py 文件源码
项目:SourceFilterContoursMelody
作者: juanjobosch
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def get_best_threshold(y_ref, y_pred_score, plot=False):
""" Get threshold on scores that maximizes f1 score.
Parameters
----------
y_ref : array
Reference labels (binary).
y_pred_score : array
Predicted scores.
plot : bool
If true, plot ROC curve
Returns
-------
best_threshold : float
threshold on score that maximized f1 score
max_fscore : float
f1 score achieved at best_threshold
"""
pos_weight = 1.0 - float(len(y_ref[y_ref == 1]))/float(len(y_ref))
neg_weight = 1.0 - float(len(y_ref[y_ref == 0]))/float(len(y_ref))
sample_weight = np.zeros(y_ref.shape)
sample_weight[y_ref == 1] = pos_weight
sample_weight[y_ref == 0] = neg_weight
print "max prediction value = %s" % np.max(y_pred_score)
print "min prediction value = %s" % np.min(y_pred_score)
precision, recall, thresholds = \
metrics.precision_recall_curve(y_ref, y_pred_score, pos_label=1,
sample_weight=sample_weight)
beta = 1.0
btasq = beta**2.0
fbeta_scores = (1.0 + btasq)*(precision*recall)/((btasq*precision)+recall)
max_fscore = fbeta_scores[np.nanargmax(fbeta_scores)]
best_threshold = thresholds[np.nanargmax(fbeta_scores)]
if plot:
plt.figure(1)
plt.subplot(1, 2, 1)
plt.plot(recall, precision, '.b', label='PR curve')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend(loc="lower right", frameon=True)
plt.subplot(1, 2, 2)
plt.plot(thresholds, fbeta_scores[:-1], '.r', label='f1-score')
plt.xlabel('Probability Threshold')
plt.ylabel('F1 score')
plt.show()
plot_data = (recall, precision, thresholds, fbeta_scores[:-1])
return best_threshold, max_fscore, plot_data
def nanargmin(a, axis=None):
"""
Return the indices of the minimum values in the specified axis ignoring
NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results
cannot be trusted if a slice contains only NaNs and Infs.
Parameters
----------
a : array_like
Input data.
axis : int, optional
Axis along which to operate. By default flattened input is used.
Returns
-------
index_array : ndarray
An array of indices or a single index value.
See Also
--------
argmin, nanargmax
Examples
--------
>>> a = np.array([[np.nan, 4], [2, 3]])
>>> np.argmin(a)
0
>>> np.nanargmin(a)
2
>>> np.nanargmin(a, axis=0)
array([1, 1])
>>> np.nanargmin(a, axis=1)
array([1, 0])
"""
a, mask = _replace_nan(a, np.inf)
res = np.argmin(a, axis=axis)
if mask is not None:
mask = np.all(mask, axis=axis)
if np.any(mask):
raise ValueError("All-NaN slice encountered")
return res
def nanargmax(a, axis=None):
"""
Return the indices of the maximum values in the specified axis ignoring
NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the
results cannot be trusted if a slice contains only NaNs and -Infs.
Parameters
----------
a : array_like
Input data.
axis : int, optional
Axis along which to operate. By default flattened input is used.
Returns
-------
index_array : ndarray
An array of indices or a single index value.
See Also
--------
argmax, nanargmin
Examples
--------
>>> a = np.array([[np.nan, 4], [2, 3]])
>>> np.argmax(a)
0
>>> np.nanargmax(a)
1
>>> np.nanargmax(a, axis=0)
array([1, 0])
>>> np.nanargmax(a, axis=1)
array([1, 1])
"""
a, mask = _replace_nan(a, -np.inf)
res = np.argmax(a, axis=axis)
if mask is not None:
mask = np.all(mask, axis=axis)
if np.any(mask):
raise ValueError("All-NaN slice encountered")
return res
def nanargmin(a, axis=None):
"""
Return the indices of the minimum values in the specified axis ignoring
NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results
cannot be trusted if a slice contains only NaNs and Infs.
Parameters
----------
a : array_like
Input data.
axis : int, optional
Axis along which to operate. By default flattened input is used.
Returns
-------
index_array : ndarray
An array of indices or a single index value.
See Also
--------
argmin, nanargmax
Examples
--------
>>> a = np.array([[np.nan, 4], [2, 3]])
>>> np.argmin(a)
0
>>> np.nanargmin(a)
2
>>> np.nanargmin(a, axis=0)
array([1, 1])
>>> np.nanargmin(a, axis=1)
array([1, 0])
"""
a, mask = _replace_nan(a, np.inf)
res = np.argmin(a, axis=axis)
if mask is not None:
mask = np.all(mask, axis=axis)
if np.any(mask):
raise ValueError("All-NaN slice encountered")
return res
def nanargmax(a, axis=None):
"""
Return the indices of the maximum values in the specified axis ignoring
NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the
results cannot be trusted if a slice contains only NaNs and -Infs.
Parameters
----------
a : array_like
Input data.
axis : int, optional
Axis along which to operate. By default flattened input is used.
Returns
-------
index_array : ndarray
An array of indices or a single index value.
See Also
--------
argmax, nanargmin
Examples
--------
>>> a = np.array([[np.nan, 4], [2, 3]])
>>> np.argmax(a)
0
>>> np.nanargmax(a)
1
>>> np.nanargmax(a, axis=0)
array([1, 0])
>>> np.nanargmax(a, axis=1)
array([1, 1])
"""
a, mask = _replace_nan(a, -np.inf)
res = np.argmax(a, axis=axis)
if mask is not None:
mask = np.all(mask, axis=axis)
if np.any(mask):
raise ValueError("All-NaN slice encountered")
return res
def evaluate_hyperparameters(dataset, iterator, args):
# Select features
if args.features is not None and args.features != dataset.feature_names:
print('selecting features ...')
features = _explode_features(args.features)
start = timeit.default_timer()
dataset = dataset.dataset_from_feature_names(features)
print('done, took %fs' % (timeit.default_timer() - start))
print('')
states = range(3, 22 + 1) # = [3,...,22]
topologies = ['full', 'left-to-right-full', 'left-to-right-1', 'left-to-right-2']
n_combinations = len(states) * len(topologies)
curr_step = 0
combinations = []
measures = []
for state in states:
for topology in topologies:
curr_step += 1
prefix = '%.3d_%d_%s' % (curr_step, state, topology)
print('(%.3d/%.3d) evaluating state=%d and topology=%s ...' % (curr_step, n_combinations, state, topology))
start = timeit.default_timer()
try:
# Configure args from which the HMMs are created
args.n_states = state
args.topology = topology
ll_stats = _compute_averaged_pos_and_neg_lls(dataset, iterator, prefix, args)
measure = _compute_measure(ll_stats, dataset, args)
except:
measure = np.nan
if measure is np.isnan(measure):
print('measure: not computable')
else:
print('measure: %f' % measure)
combinations.append((str(state), topology))
measures.append(measure)
print('done, took %fs' % (timeit.default_timer() - start))
print('')
best_idx = np.nanargmax(np.array(measures)) # get the argmax ignoring NaNs
print('best combination with score %f: %s' % (measures[best_idx], ', '.join(combinations[best_idx])))
print('detailed reports have been saved')
# Save results
assert len(combinations) == len(measures)
if args.output_dir is not None:
filename = '_results.csv'
with open(os.path.join(args.output_dir, filename), 'wb') as f:
writer = csv.writer(f, delimiter=';')
writer.writerow(['', 'idx', 'measure', 'combination'])
for idx, (measure, combination) in enumerate(zip(measures, combinations)):
selected = '*' if best_idx == idx else ''
writer.writerow([selected, '%d' % idx, '%f' % measure, ', '.join(combination)])
def evaluate_pca(dataset, iterator, args):
# Select features
if args.features is not None and args.features != dataset.feature_names:
print('selecting features ...')
features = _explode_features(args.features)
start = timeit.default_timer()
dataset = dataset.dataset_from_feature_names(features)
print('done, took %fs' % (timeit.default_timer() - start))
print('')
pca_components = range(1, dataset.n_features)
total_steps = len(pca_components)
if 'pca' not in args.transformers:
args.transformers.append('pca')
curr_step = 0
measures = []
for n_components in pca_components:
curr_step += 1
prefix = '%.3d' % curr_step
print('(%.3d/%.3d) evaluating with %d pca components ...' % (curr_step, total_steps, n_components))
start = timeit.default_timer()
try:
args.pca_components = n_components
ll_stats = _compute_averaged_pos_and_neg_lls(dataset, iterator, prefix, args)
measure = _compute_measure(ll_stats, dataset, args)
except:
measure = np.nan
if measure is np.isnan(measure):
print('measure: not computable')
else:
print('measure: %f' % measure)
# Correct score. The problem is that it is computed given the dataset, which has too many features.
measure = (measure * float(dataset.n_features)) / float(n_components)
measures.append(measure)
print('done, took %fs' % (timeit.default_timer() - start))
print('')
assert len(pca_components) == len(measures)
best_idx = np.nanargmax(np.array(measures)) # get the argmax ignoring NaNs
print('best result with score %f: %d PCA components' % (measures[best_idx], pca_components[best_idx]))
print('detailed reports have been saved')
# Save results
if args.output_dir is not None:
filename = '_results.csv'
with open(os.path.join(args.output_dir, filename), 'wb') as f:
writer = csv.writer(f, delimiter=';')
writer.writerow(['', 'idx', 'measure', 'components'])
for idx, (measure, n_components) in enumerate(zip(measures, pca_components)):
selected = '*' if best_idx == idx else ''
writer.writerow([selected, '%d' % idx, '%f' % measure, '%d' % n_components])
def evaluate_fhmms(dataset, iterator, args):
# Select features
if args.features is not None and args.features != dataset.feature_names:
print('selecting features ...')
features = _explode_features(args.features)
start = timeit.default_timer()
dataset = dataset.dataset_from_feature_names(features)
print('done, took %fs' % (timeit.default_timer() - start))
print('')
chains = [1, 2, 3, 4]
total_steps = len(chains)
curr_step = 0
measures = []
for chain in chains:
curr_step += 1
prefix = '%.3d_%d-chains' % (curr_step, chain)
print('(%.3d/%.3d) evaluating n_chains=%d ...' % (curr_step, total_steps, chain))
start = timeit.default_timer()
old_loglikelihood_method = args.loglikelihood_method
try:
# Configure args from which the HMMs are created
args.n_chains = chain
if chain == 1:
args.model = 'hmm'
args.loglikelihood_method = 'exact' # there's no approx loglikelihood method for HMMs
else:
args.model = 'fhmm-seq'
ll_stats = _compute_averaged_pos_and_neg_lls(dataset, iterator, prefix, args, save_model=True, compute_distances=False)
measure = _compute_measure(ll_stats, dataset, args)
except:
measure = np.nan
args.loglikelihood_method = old_loglikelihood_method
if measure is np.isnan(measure):
print('measure: not computable')
else:
print('measure: %f' % measure)
measures.append(measure)
print('done, took %fs' % (timeit.default_timer() - start))
print('')
best_idx = np.nanargmax(np.array(measures)) # get the argmax ignoring NaNs
print('best model with score %f: %d chains' % (measures[best_idx], chains[best_idx]))
print('detailed reports have been saved')
# Save results
assert len(chains) == len(measures)
if args.output_dir is not None:
filename = '_results.csv'
with open(os.path.join(args.output_dir, filename), 'wb') as f:
writer = csv.writer(f, delimiter=';')
writer.writerow(['', 'idx', 'measure', 'chains'])
for idx, (measure, chain) in enumerate(zip(measures, chains)):
selected = '*' if best_idx == idx else ''
writer.writerow([selected, '%d' % idx, '%f' % measure, '%d' % chain])
def nanargmin(a, axis=None):
"""
Return the indices of the minimum values in the specified axis ignoring
NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results
cannot be trusted if a slice contains only NaNs and Infs.
Parameters
----------
a : array_like
Input data.
axis : int, optional
Axis along which to operate. By default flattened input is used.
Returns
-------
index_array : ndarray
An array of indices or a single index value.
See Also
--------
argmin, nanargmax
Examples
--------
>>> a = np.array([[np.nan, 4], [2, 3]])
>>> np.argmin(a)
0
>>> np.nanargmin(a)
2
>>> np.nanargmin(a, axis=0)
array([1, 1])
>>> np.nanargmin(a, axis=1)
array([1, 0])
"""
a, mask = _replace_nan(a, np.inf)
res = np.argmin(a, axis=axis)
if mask is not None:
mask = np.all(mask, axis=axis)
if np.any(mask):
raise ValueError("All-NaN slice encountered")
return res