def find_dup(a):
""" Find the duplicated elements in 1-D a tensor.
Args:
a: 1-D tensor.
Return:
more_than_one_vals: duplicated value in a.
indexes_in_a: duplicated value's index in a.
dups_in_a: duplicated value with duplicate in a.
"""
unique_a_vals, unique_idx = tf.unique(a)
count_a_unique = tf.unsorted_segment_sum(tf.ones_like(a),
unique_idx,
tf.shape(a)[0])
more_than_one = tf.greater(count_a_unique, 1)
more_than_one_idx = tf.squeeze(tf.where(more_than_one))
more_than_one_vals = tf.squeeze(tf.gather(unique_a_vals, more_than_one_idx))
not_duplicated, _ = tf.setdiff1d(a, more_than_one_vals)
dups_in_a, indexes_in_a = tf.setdiff1d(a, not_duplicated)
return more_than_one_vals, indexes_in_a, dups_in_a
python类unique()的实例源码
def predict(self, answer, start_logits, end_logits, mask) -> Prediction:
masked_start_logits = exp_mask(start_logits, mask)
masked_end_logits = exp_mask(end_logits, mask)
if len(answer) == 3:
group_ids = answer[2]
# Turn the ids into segment ids using tf.unique
_, group_segments = tf.unique(group_ids, out_idx=tf.int32)
losses = []
for answer_mask, logits in zip(answer, [masked_start_logits, masked_end_logits]):
group_norms = segment_logsumexp(logits, group_segments)
if self.aggregate == "sum":
log_score = segment_logsumexp(logits + VERY_NEGATIVE_NUMBER * (1 - tf.cast(answer_mask, tf.float32)),
group_segments)
else:
raise ValueError()
losses.append(tf.reduce_mean(-(log_score - group_norms)))
loss = tf.add_n(losses)
else:
raise NotImplemented()
tf.add_to_collection(tf.GraphKeys.LOSSES, loss)
return BoundaryPrediction(tf.nn.softmax(masked_start_logits),
tf.nn.softmax(masked_end_logits),
masked_start_logits, masked_end_logits, mask)
def unique(x, return_index=False):
"""
Find the unique elements of an array.
Returns the sorted unique elements of an array. There are three optional
outputs in addition to the unique elements: the indices of the input array
that give the unique values, the indices of the unique array that
reconstruct the input array, and the number of times each unique value
comes up in the input array.
"""
y, indices = tensorflow.unique(x)
if return_index:
return y, indices
else:
return y
def embedding_lookup_sparse_sumexp(params, sp_ids,
name=None):
segment_ids = sp_ids.indices[:, 0]
if segment_ids.dtype != tf.int32:
segment_ids = tf.cast(segment_ids, tf.int32)
ids = sp_ids.values
ids, idx = tf.unique(ids)
embeddings = tf.nn.embedding_lookup(params, ids)
embeddings = tf.exp(embeddings)
embeddings = tf.sparse_segment_sum(embeddings, idx, segment_ids,
name=name)
return embeddings
def _apply_sparse(self, cache):
""""""
x_tm1, g_t, idxs = cache['x_tm1'], cache['g_t'], cache['idxs']
idxs, idxs_ = tf.unique(idxs)
g_t_ = tf.unsorted_segment_sum(g_t, idxs_, tf.size(idxs))
updates = cache['updates']
if self.mu > 0:
m_t, t_m = self._sparse_moving_average(x_tm1, idxs, g_t_, 'm', beta=self.mu)
m_t_ = tf.gather(m_t, idxs)
m_bar_t_ = (1-self.gamma) * m_t_ + self.gamma * g_t_
updates.extend([m_t, t_m])
else:
m_bar_t_ = g_t_
if self.nu > 0:
v_t, t_v = self._sparse_moving_average(x_tm1, idxs, g_t_**2, 'v', beta=self.nu)
v_t_ = tf.gather(v_t, idxs)
v_bar_t_ = tf.sqrt(v_t_ + self.epsilon)
updates.extend([v_t, t_v])
else:
v_bar_t_ = 1
s_t_ = self.learning_rate * m_bar_t_ / v_bar_t_
cache['s_t'] = s_t_
cache['g_t'] = g_t_
cache['idxs'] = idxs
return cache
def _apply_sparse(self, cache):
""""""
g_t, idxs = cache['g_t'], cache['idxs']
idxs, idxs_ = tf.unique(idxs)
g_t_ = tf.unsorted_segment_sum(g_t, idxs_, tf.size(idxs))
cache['g_t'] = g_t_
cache['idxs'] = idxs
cache['s_t'] = self.learning_rate * g_t_
return cache
def _apply_sparse(self, cache):
""""""
x_tm1, g_t, idxs = cache['x_tm1'], cache['g_t'], cache['idxs']
idxs, idxs_ = tf.unique(idxs)
g_t_ = tf.unsorted_segment_sum(g_t, idxs_, tf.size(idxs))
updates = cache['updates']
if self.mu > 0:
m_t, t_m = self._sparse_moving_average(x_tm1, idxs, g_t_, 'm', beta=self.mu)
m_t_ = tf.gather(m_t, idxs)
m_bar_t_ = (1-self.gamma) * m_t_ + self.gamma * g_t_
updates.extend([m_t, t_m])
else:
m_bar_t_ = g_t_
if self.nu > 0:
v_t, t_v = self._sparse_moving_average(x_tm1, idxs, g_t_**2, 'v', beta=self.nu)
v_t_ = tf.gather(v_t, idxs)
v_bar_t_ = tf.sqrt(v_t_ + self.epsilon)
updates.extend([v_t, t_v])
else:
v_bar_t_ = 1
s_t_ = self.learning_rate * m_bar_t_ / v_bar_t_
cache['s_t'] = s_t_
cache['g_t'] = g_t_
cache['idxs'] = idxs
return cache
def _apply_sparse(self, cache):
""""""
g_t, idxs = cache['g_t'], cache['idxs']
idxs, idxs_ = tf.unique(idxs)
g_t_ = tf.unsorted_segment_sum(g_t, idxs_, tf.size(idxs))
cache['g_t'] = g_t_
cache['idxs'] = idxs
cache['s_t'] = self.learning_rate * g_t_
return cache
def query_once(self) -> bool:
"""
Should the embedder be queried once for each unique word in the input, or once for each word.
Intended to support placeholders, although I ended up not experimenting much w/that route
"""
return False
def shrink_embed(mat, word_ixs: List):
"""
Build an embedding matrix that contains only the elements in `word_ixs`,
and map `word_ixs` to tensors the index into they new embedding matrix.
Useful if you want to dropout the embeddings w/o dropping out the entire matrix
"""
all_words, out_id = tf.unique(tf.concat([tf.reshape(x, (-1,)) for x in word_ixs], axis=0))
mat = tf.gather(mat, all_words)
partitions = tf.split(out_id, [tf.reduce_prod(tf.shape(x)) for x in word_ixs])
partitions = [tf.reshape(x, tf.shape(o)) for x,o in zip(partitions, word_ixs)]
return mat, partitions
def _apply_sparse(self, cache):
""""""
x_tm1, g_t, idxs = cache['x_tm1'], cache['g_t'], cache['idxs']
idxs, idxs_ = tf.unique(idxs)
g_t_ = tf.unsorted_segment_sum(g_t, idxs_, tf.size(idxs))
updates = cache['updates']
if self.mu > 0:
m_t, t_m = self._sparse_moving_average(x_tm1, idxs, g_t_, 'm', beta=self.mu)
m_t_ = tf.gather(m_t, idxs)
m_bar_t_ = (1-self.gamma) * m_t_ + self.gamma * g_t_
updates.extend([m_t, t_m])
else:
m_bar_t_ = g_t_
if self.nu > 0:
v_t, t_v = self._sparse_moving_average(x_tm1, idxs, g_t_**2, 'v', beta=self.nu)
v_t_ = tf.gather(v_t, idxs)
v_bar_t_ = tf.sqrt(v_t_ + self.epsilon)
updates.extend([v_t, t_v])
else:
v_bar_t_ = 1
s_t_ = self.learning_rate * m_bar_t_ / v_bar_t_
cache['s_t'] = tf.where(tf.is_finite(s_t_), s_t_, tf.zeros_like(s_t_))
cache['g_t'] = g_t_
cache['idxs'] = idxs
return cache
def _apply_sparse(self, cache):
""""""
g_t, idxs = cache['g_t'], cache['idxs']
idxs, idxs_ = tf.unique(idxs)
g_t_ = tf.unsorted_segment_sum(g_t, idxs_, tf.size(idxs))
cache['g_t'] = g_t_
cache['idxs'] = idxs
cache['s_t'] = self.learning_rate * g_t_
return cache
def test_Unique(self):
t = tf.unique([9, 3, 5, 7, 3, 9, 9], out_idx=tf.int32)
self.check(t)