def in_top_k(predictions, targets, k):
'''Returns whether the `targets` are in the top `k` `predictions`
# Arguments
predictions: A tensor of shape batch_size x classess and type float32.
targets: A tensor of shape batch_size and type int32 or int64.
k: An int, number of top elements to consider.
# Returns
A tensor of shape batch_size and type int. output_i is 1 if
targets_i is within top-k values of predictions_i
'''
predictions_top_k = T.argsort(predictions)[:, -k:]
result, _ = theano.map(lambda prediction, target: any(equal(prediction, target)), sequences=[predictions_top_k, targets])
return result
# CONVOLUTIONS
python类map()的实例源码
def in_top_k(predictions, targets, k):
"""Returns whether the `targets` are in the top `k` `predictions`
# Arguments
predictions: A tensor of shape batch_size x classess and type float32.
targets: A tensor of shape batch_size and type int32 or int64.
k: An int, number of top elements to consider.
# Returns
A tensor of shape batch_size and type int. output_i is 1 if
targets_i is within top-k values of predictions_i
"""
predictions_top_k = T.argsort(predictions)[:, -k:]
result, _ = theano.map(lambda prediction, target: any(equal(prediction, target)), sequences=[predictions_top_k, targets])
return result
# CONVOLUTIONS
def in_top_k(predictions, targets, k):
'''Returns whether the `targets` are in the top `k` `predictions`
# Arguments
predictions: A tensor of shape batch_size x classess and type float32.
targets: A tensor of shape batch_size and type int32 or int64.
k: An int, number of top elements to consider.
# Returns
A tensor of shape batch_size and type int. output_i is 1 if
targets_i is within top-k values of predictions_i
'''
predictions_top_k = T.argsort(predictions)[:, -k:]
result, _ = theano.map(lambda prediction, target: any(equal(prediction, target)), sequences=[predictions_top_k, targets])
return result
# CONVOLUTIONS
def gradient_descent(self, loss):
"""Momentum GD with gradient clipping."""
grad = T.grad(loss, self.params)
self.momentum_velocity_ = [0.] * len(grad)
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grad)))
updates = OrderedDict()
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
scaling_den = T.maximum(5.0, grad_norm)
for n, (param, grad) in enumerate(zip(self.params, grad)):
grad = T.switch(not_finite, 0.1 * param,
grad * (5.0 / scaling_den))
velocity = self.momentum_velocity_[n]
update_step = self.momentum * velocity - self.learning_rate * grad
self.momentum_velocity_[n] = update_step
updates[param] = param + update_step
return updates
def generate(self, relative_position, cur_chord_root, cur_chord_type, **kwargs):
"""
Generate a chord input for a given timestep.
Parameters:
relative_position: A theano tensor (int32) of shape (n_parallel), giving the
current relative position for this timestep
cur_chord_root: A theano tensor (int32) of shape (n_parallel) giving the unshifted chord root
cur_chord_type: A theano tensor (int32) of shape (n_parallel, CHORD_WIDTH), giving the unshifted chord
type representation, parsed from the leadsheet
Returns:
piece: A theano tensor (float32) of shape (n_parallel, PART_WIDTH)
"""
def _map_fn(pos, chord):
# Now pos is scalar and chord is of shape (CHORD_WIDTH), so we can roll
return T.roll(chord, (-pos)%12, 0)
shifted_chords, _ = theano.map(_map_fn, sequences=[relative_position-cur_chord_root, cur_chord_type])
# shifted_chords = theano.printing.Print("ChordShiftInputPart")(shifted_chords)
# shifted_chords = T.opt.Assert()(shifted_chords, T.eq(shifted_chords.shape[1], self.PART_WIDTH))
return shifted_chords
def in_top_k(predictions, targets, k):
"""Returns whether the `targets` are in the top `k` `predictions`
# Arguments
predictions: A tensor of shape batch_size x classess and type float32.
targets: A tensor of shape batch_size and type int32 or int64.
k: An int, number of top elements to consider.
# Returns
A tensor of shape batch_size and type int. output_i is 1 if
targets_i is within top-k values of predictions_i
"""
predictions_top_k = T.argsort(predictions)[:, -k:]
result, _ = theano.map(lambda prediction, target: any(equal(prediction, target)), sequences=[predictions_top_k, targets])
return result
# CONVOLUTIONS
def test_map(self):
v = theano.tensor.vector('v')
abs_expr, abs_updates = theano.map(
lambda x: abs(x),
v,
[],
truncate_gradient=-1,
go_backwards=False)
f = theano.function([v],
abs_expr,
updates=abs_updates,
allow_input_downcast=True)
rng = numpy.random.RandomState(utt.fetch_seed())
vals = rng.uniform(size=(10,), low=-5., high=5.)
abs_vals = abs(vals)
theano_vals = f(vals)
utt.assert_allclose(abs_vals, theano_vals)
def batch_sim5(w, M, eps=1e-6):
"""
w: matrix with shape (batch, memory_elem)
M: tensor with shape (batch, memory_size, memory_elem)
eps: numerical stability parameter
"""
M = M[0] # (memory_size, memory_elem)
def batch_cos_sim(m, w, eps=eps):
"""
Takes two vectors and calculates the scalar cosine similarity.
m: vector with shape (memory_elem,)
w: vector with shape (batch, memory_elem)
returns: scalar
"""
sim = T.dot(m,w.T) / T.sqrt((m*m).sum() * (w*w).sum(1) + eps)
return sim #(batch,)
sim, _ = theano.map(fn=batch_cos_sim, sequences=[M], non_sequences=[w])
sim = sim.dimshuffle(1,0) # (batch, memory_size)
return sim
def batch_sim6(w, M, eps=1e-6):
"""
w: matrix with shape (batch, memory_elem)
M: tensor with shape (batch, memory_size, memory_elem)
eps: numerical stability parameter
"""
M = M[0] #only one true memory
#M = M.dimshuffle(1,0) # (memory_elem, memory_size)
def norm(A):
"""
Calculate the column norm of matrix A
A: matrix with shape (N, M)
return: vector with shape (N,)
"""
n, _ = theano.map(fn=lambda a: T.sqrt((a*a).sum()), sequences=[A])
return n
norm = T.outer(norm(w), norm(M)) #(batch, memory_size)
batch_sim = T.dot(w, M.T) / (norm + eps) #(batch, memory_size)
return batch_sim
def map_fn(fn, elems, name=None):
'''Map the function fn over the elements elems and return the outputs.
# Arguments
fn: Callable that will be called upon each element in elems
elems: tensor, at least 2 dimensional
name: A string name for the map node in the graph
# Returns
Tensor with first dimension equal to the elems and second depending on
fn
'''
return theano.map(fn, elems, name=name)[0]
def sym_histograms(self, X):
"""
Encodes a set of objects (X is a tensor3)
:param X: tensor3 containing the feature vectors for each object
:return:
"""
histograms, updates = theano.map(self.sym_histogram, X)
return histograms
def map_fn(fn, elems, name=None):
"""Map the function fn over the elements elems and return the outputs.
# Arguments
fn: Callable that will be called upon each element in elems
elems: tensor, at least 2 dimensional
name: A string name for the map node in the graph
# Returns
Tensor with first dimension equal to the elems and second depending on
fn
"""
return theano.map(fn, elems, name=name)[0]
def map_fn(fn, elems, name=None):
'''Map the function fn over the elements elems and return the outputs.
# Arguments
fn: Callable that will be called upon each element in elems
elems: tensor, at least 2 dimensional
name: A string name for the map node in the graph
# Returns
Tensor with first dimension equal to the elems and second depending on
fn
'''
return theano.map(fn, elems, name=name)[0]
def create_recursive_unit(self):
self.W_z = theano.shared(self.init_matrix([self.hidden_dim, self.emb_dim]))
self.U_z = theano.shared(self.init_matrix(
[self.degree, self.hidden_dim, self.hidden_dim]))
self.W_r = theano.shared(self.init_matrix([self.hidden_dim, self.emb_dim]))
self.U_r = theano.shared(self.init_matrix(
[self.degree, self.hidden_dim, self.hidden_dim]))
self.W_h = theano.shared(self.init_matrix([self.hidden_dim, self.emb_dim]))
self.U_h = theano.shared(self.init_matrix([self.hidden_dim, self.hidden_dim]))
self.params.extend([
self.W_z, self.U_z,
self.W_r, self.U_r,
self.W_h, self.U_h])
def unit(parent_x, child_h, child_exists):
(pre_z, pre_r), _ = theano.map(
fn=lambda Uz, Ur, h: (T.dot(Uz, h), T.dot(Ur, h)),
sequences=[self.U_z, self.U_r, child_h])
z = _softmax(
T.dot(self.W_z, parent_x).dimshuffle('x', 0) + pre_z,
child_exists, add_one=True)
r = _softmax(
T.dot(self.W_r, parent_x).dimshuffle('x', 0) + pre_r,
child_exists, add_one=False)
h_hat = T.tanh(T.dot(self.W_h, parent_x) +
T.dot(self.U_h, T.sum(r * child_h, axis=0)))
h = (1 - T.sum(z, axis=0)) * h_hat + T.sum(z * child_h, axis=0)
return h
return unit
def compute_tree(self, emb_x, tree):
self.recursive_unit = self.create_recursive_unit()
self.leaf_unit = self.create_leaf_unit()
num_nodes = tree.shape[0] # num internal nodes
num_leaves = self.num_words - num_nodes
# compute leaf hidden states
leaf_h, _ = theano.map(
fn=self.leaf_unit,
sequences=[emb_x[:num_leaves]])
if self.irregular_tree:
init_node_h = T.concatenate([leaf_h, leaf_h], axis=0)
else:
init_node_h = leaf_h
# use recurrence to compute internal node hidden states
def _recurrence(cur_emb, node_info, t, node_h, last_h):
child_exists = node_info > -1
offset = num_leaves * int(self.irregular_tree) - child_exists * t
child_h = node_h[node_info + offset] * child_exists.dimshuffle(0, 'x')
parent_h = self.recursive_unit(cur_emb, child_h, child_exists)
node_h = T.concatenate([node_h,
parent_h.reshape([1, self.hidden_dim])])
return node_h[1:], parent_h
dummy = theano.shared(self.init_vector([self.hidden_dim]))
(_, parent_h), _ = theano.scan(
fn=_recurrence,
outputs_info=[init_node_h, dummy],
sequences=[emb_x[num_leaves:], tree, T.arange(num_nodes)],
n_steps=num_nodes)
return T.concatenate([leaf_h, parent_h], axis=0)
def compute_tree(self, emb_x, tree):
self.recursive_unit = self.create_recursive_unit()
self.leaf_unit = self.create_leaf_unit()
num_nodes = tree.shape[0] # num internal nodes
num_leaves = self.num_words - num_nodes
# compute leaf hidden states
(leaf_h, leaf_c), _ = theano.map(
fn=self.leaf_unit,
sequences=[emb_x[:num_leaves]])
if self.irregular_tree:
init_node_h = T.concatenate([leaf_h, leaf_h], axis=0)
init_node_c = T.concatenate([leaf_c, leaf_c], axis=0)
else:
init_node_h = leaf_h
init_node_c = leaf_c
# use recurrence to compute internal node hidden states
def _recurrence(cur_emb, node_info, t, node_h, node_c, last_h):
child_exists = node_info > -1
offset = num_leaves * int(self.irregular_tree) - child_exists * t
child_h = node_h[node_info + offset] * child_exists.dimshuffle(0, 'x')
child_c = node_c[node_info + offset] * child_exists.dimshuffle(0, 'x')
parent_h, parent_c = self.recursive_unit(cur_emb, child_h, child_c, child_exists)
node_h = T.concatenate([node_h,
parent_h.reshape([1, self.hidden_dim])])
node_c = T.concatenate([node_c,
parent_c.reshape([1, self.hidden_dim])])
return node_h[1:], node_c[1:], parent_h
dummy = theano.shared(self.init_vector([self.hidden_dim]))
(_, _, parent_h), _ = theano.scan(
fn=_recurrence,
outputs_info=[init_node_h, init_node_c, dummy],
sequences=[emb_x[num_leaves:], tree, T.arange(num_nodes)],
n_steps=num_nodes)
return T.concatenate([leaf_h, parent_h], axis=0)
def decode_to_probs(self, activations, relative_position, low_bound, high_bound):
squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH))
n_parallel = squashed.shape[0]
probs = T.nnet.softmax(squashed)
def _scan_fn(cprobs, cpos):
if self.with_artic:
abs_probs = cprobs[:2]
rel_probs = cprobs[2:]
else:
rel_probs = cprobs
abs_probs = T.ones((2,))
aligned = T.roll(rel_probs, (cpos-low_bound)%12)
num_tile = int(math.ceil((high_bound-low_bound)/self.WINDOW_SIZE))
tiled = T.tile(aligned, (num_tile,))[:(high_bound-low_bound)]
full = T.concatenate([abs_probs, tiled], 0)
return full
# probs = theano.printing.Print("probs",['shape'])(probs)
# relative_position = theano.printing.Print("relative_position",['shape'])(relative_position)
from_scan, _ = theano.map(fn=_scan_fn, sequences=[probs, T.flatten(relative_position)])
# from_scan = theano.printing.Print("from_scan",['shape'])(from_scan)
newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0)
fixed = T.reshape(from_scan, newshape, ndim=activations.ndim)
return fixed
def map_fn(fn, elems, name=None):
"""Map the function fn over the elements elems and return the outputs.
# Arguments
fn: Callable that will be called upon each element in elems
elems: tensor, at least 2 dimensional
name: A string name for the map node in the graph
# Returns
Tensor with first dimension equal to the elems and second depending on
fn
"""
return theano.map(fn, elems, name=name)[0]
def map_fn(fn, elems, name=None, dtype=None):
"""Map the function fn over the elements elems and return the outputs.
# Arguments
fn: Callable that will be called upon each element in elems
elems: tensor, at least 2 dimensional
name: A string name for the map node in the graph
# Returns
Tensor with first dimension equal to the elems and second depending on
fn
"""
return theano.map(fn, elems, name=name)[0]
def batch_sim(w, M, eps=1e-6):
"""
w: matrix with shape (batch, memory_elem)
M: tensor with shape (batch, memory_size, memory_elem)
eps: numerical stability parameter
"""
M = M.dimshuffle(1,0,2) # (N, batch, M)
def cos_sim(u, v, eps=eps):
"""
Takes two vectors and calculates the scalar cosine similarity.
u: vector with shape (memory_elem,)
v: vector with shape (memory_elem,)
returns: scalar
"""
sim = T.dot(u,v) / T.sqrt((u*u).sum() * (v*v).sum() + eps)
return sim
def batch_cos_sim(m_i, w):
"""
Takes two matrices and calculates the scalar cosine similarity
of their columns.
m_i: matrix with shape (batch, memory_elem)
w: matrix with shape (batch, memory_elem)
returns: vector with shape (batch,)
"""
sim, _ = theano.map(fn=cos_sim, sequences=[w, m_i])
return sim
sim, _ = theano.map(fn=batch_cos_sim, sequences=[M], non_sequences=[w])
sim = sim.dimshuffle(1,0) # (batch, memory_size)
return sim
def call(self, inputs, mask=None):
def f(i, embedding, text_input):
mask = T.neq(text_input[i], 0).astype(FLOATX)
vec = T.dot(mask, embedding[i])
vec /= T.maximum(vec.norm(2, 0), K.epsilon())
return T.dot(vec, self.W) + self.b
return theano.map(f, T.arange(inputs[0].shape[0]), non_sequences=inputs)[0]
def call(self, inputs, mask=None):
l1 = inputs[0]
l2 = inputs[1]
def f(i, l1, l2):
return T.clip(T.batched_tensordot(l1[i], l2[i], 1), FLOAT_MIN, FLOAT_MAX).astype(FLOATX)
return theano.map(f, T.arange(l1.shape[0]), non_sequences=[l1, l2])[0]
def create_recursive_unit(self):
self.W_i = theano.shared(self.init_matrix([self.hidden_dim, self.emb_dim]))
self.U_i = theano.shared(self.init_matrix(
[self.degree, self.hidden_dim, self.hidden_dim]))
self.b_i = theano.shared(self.init_vector([self.hidden_dim]))
self.W_f = theano.shared(self.init_matrix([self.hidden_dim, self.emb_dim]))
self.U_f = theano.shared(self.init_matrix(
[self.degree, self.degree, self.hidden_dim, self.hidden_dim]))
self.b_f = theano.shared(self.init_vector([self.hidden_dim]))
self.W_o = theano.shared(self.init_matrix([self.hidden_dim, self.emb_dim]))
self.U_o = theano.shared(self.init_matrix(
[self.degree, self.hidden_dim, self.hidden_dim]))
self.b_o = theano.shared(self.init_vector([self.hidden_dim]))
self.W_u = theano.shared(self.init_matrix([self.hidden_dim, self.emb_dim]))
self.U_u = theano.shared(self.init_matrix(
[self.degree, self.hidden_dim, self.hidden_dim]))
self.b_u = theano.shared(self.init_vector([self.hidden_dim]))
self.params.extend([
self.W_i, self.U_i, self.b_i,
self.W_f, self.U_f, self.b_f,
self.W_o, self.U_o, self.b_o,
self.W_u, self.U_u, self.b_u])
def unit(parent_x, child_h, child_c, child_exists):
(h_i, h_o, h_u), _ = theano.map(
fn=lambda Ui, Uo, Uu, h, exists:
(exists * T.dot(Ui, h), exists * T.dot(Uo, h), exists * T.dot(Uu, h)),
sequences=[self.U_i, self.U_o, self.U_u, child_h, child_exists])
i = T.nnet.sigmoid(T.dot(self.W_i, parent_x) + h_i.sum(axis=0) + self.b_i)
o = T.nnet.sigmoid(T.dot(self.W_o, parent_x) + h_o.sum(axis=0) + self.b_o)
u = T.tanh(T.dot(self.W_u, parent_x) + h_u.sum(axis=0) + self.b_u)
def _sub_f(U):
sub_h_f, _ = theano.map(
fn=lambda sub_U, h, exists: exists * T.dot(sub_U, h),
sequences=[U, child_h, child_exists])
return sub_h_f.sum(axis=0)
h_f, _ = theano.map(
fn=lambda U: _sub_f(U),
sequences=[self.U_f])
f = (T.nnet.sigmoid(
T.dot(self.W_f, parent_x).dimshuffle('x', 0) + h_f +
self.b_f.dimshuffle('x', 0)) *
child_exists.dimshuffle(0, 'x'))
c = i * u + T.sum(f * child_c, axis=0)
h = o * T.tanh(c)
return h, c
return unit