def conv_step(nodes, children, feature_size, w_t, w_r, w_l, b_conv):
"""Convolve a batch of nodes and children.
Lots of high dimensional tensors in this function. Intuitively it makes
more sense if we did this work with while loops, but computationally this
is more efficient. Don't try to wrap your head around all the tensor dot
products, just follow the trail of dimensions.
"""
with tf.name_scope('conv_step'):
# nodes is shape (batch_size x max_tree_size x feature_size)
# children is shape (batch_size x max_tree_size x max_children)
with tf.name_scope('trees'):
# children_vectors will have shape
# (batch_size x max_tree_size x max_children x feature_size)
children_vectors = children_tensor(nodes, children, feature_size)
# add a 4th dimension to the nodes tensor
nodes = tf.expand_dims(nodes, axis=2)
# tree_tensor is shape
# (batch_size x max_tree_size x max_children + 1 x feature_size)
tree_tensor = tf.concat([nodes, children_vectors], axis=2, name='trees')
with tf.name_scope('coefficients'):
# coefficient tensors are shape (batch_size x max_tree_size x max_children + 1)
c_t = eta_t(children)
c_r = eta_r(children, c_t)
c_l = eta_l(children, c_t, c_r)
# concatenate the position coefficients into a tensor
# (batch_size x max_tree_size x max_children + 1 x 3)
coef = tf.stack([c_t, c_r, c_l], axis=3, name='coef')
with tf.name_scope('weights'):
# stack weight matrices on top to make a weight tensor
# (3, feature_size, output_size)
weights = tf.stack([w_t, w_r, w_l], axis=0)
with tf.name_scope('combine'):
batch_size = tf.shape(children)[0]
max_tree_size = tf.shape(children)[1]
max_children = tf.shape(children)[2]
# reshape for matrix multiplication
x = batch_size * max_tree_size
y = max_children + 1
result = tf.reshape(tree_tensor, (x, y, feature_size))
coef = tf.reshape(coef, (x, y, 3))
result = tf.matmul(result, coef, transpose_a=True)
result = tf.reshape(result, (batch_size, max_tree_size, 3, feature_size))
# output is (batch_size, max_tree_size, output_size)
result = tf.tensordot(result, weights, [[2, 3], [0, 1]])
# output is (batch_size, max_tree_size, output_size)
return tf.nn.tanh(result + b_conv, name='conv')
python类tensordot()的实例源码
def __init__(self, numberOfUnits, dictionarySize, maximumLength, inputFeatures = None, alwaysProvideInput = False):
self.model = rnn.LSTMCell(numberOfUnits)
self.loadingMatrix = tf.Variable(tf.random_uniform([numberOfUnits,dictionarySize],-1.0,1.0),name = 'LOADINGMATRIX')
self.lengthPlaceholder = tf.placeholder(tf.int32, shape = [None],name = 'LENGTH')
self.maximumLength = maximumLength
self.dictionarySize = dictionarySize
if inputFeatures != None:
self.transformedInputFeatures = [ tf.layers.dense(inputs = inputFeatures,
units = s,
activation = tf.nn.tanh)
for s in self.model.state_size ]
self.transformedInputFeatures = rnn.LSTMStateTuple(*self.transformedInputFeatures)
if alwaysProvideInput:
self.alwaysProvidedInput = tf.layers.dense(inputs = inputFeatures,
units = numberOfUnits,
activation = tf.nn.tanh)
else: self.alwaysProvidedInput = None
else:
self.transformedInputFeatures = None
self.alwaysProvidedInput = None
# Unrolls some number of steps maximumLength
self.inputPlaceholder = tf.placeholder(tf.int32, shape = [None,maximumLength],name = 'INPUT')
embeddedInputs = tf.nn.embedding_lookup(tf.transpose(self.loadingMatrix),self.inputPlaceholder)
if alwaysProvideInput:
# alwaysProvidedInput: [None,numberOfUnits]
# we want to duplicate it along the time axis to get [None,numberOfTimesSteps,numberOfUnits]
alwaysProvidedInput2 = tf.reshape(self.alwaysProvidedInput,[-1,1,numberOfUnits])
alwaysProvidedInput3 = tf.tile(alwaysProvidedInput2, [1,maximumLength,1])
embeddedInputs = embeddedInputs + alwaysProvidedInput3
self.outputs, self.states = tf.nn.dynamic_rnn(self.model,
inputs = embeddedInputs,
dtype = tf.float32,
sequence_length = self.lengthPlaceholder,
initial_state = self.transformedInputFeatures)
# projectedOutputs: None x timeSteps x dictionarySize
projectedOutputs = tf.tensordot(self.outputs, self.loadingMatrix, axes = [[2],[0]])
self.outputDistribution = tf.nn.log_softmax(projectedOutputs)
self.hardOutputs = tf.cast(tf.argmax(projectedOutputs,dimension = 2),tf.int32)
# A small graph for running the recurrence network forward one step
self.statePlaceholders = [ tf.placeholder(tf.float32, [None,numberOfUnits], name = 'state0'),
tf.placeholder(tf.float32, [None,numberOfUnits], name = 'state1')]
self.oneInputPlaceholder = tf.placeholder(tf.int32, shape = [None], name = 'inputForOneStep')
projectedInputs = tf.nn.embedding_lookup(tf.transpose(self.loadingMatrix),self.oneInputPlaceholder)
if alwaysProvideInput: projectedInputs = projectedInputs + self.alwaysProvidedInput
self.oneOutput, self.oneNewState = self.model(projectedInputs,
rnn.LSTMStateTuple(*self.statePlaceholders))
self.oneNewState = [self.oneNewState[0],self.oneNewState[1]]
self.oneOutputDistribution = tf.nn.log_softmax(tf.matmul(self.oneOutput, self.loadingMatrix))
# sequence prediction model with prediction fed into input
def learn_comb_orth_rmsprop(poses, dm_shape, reuse=None, _float_type=tf.float32):
with tf.variable_scope("learn_comb", reuse=reuse):
comb_matrix = tf.get_variable(
"matrix", [dm_shape[0], dm_shape[1]],
initializer=identity_initializer(0),
dtype=_float_type, trainable=False
)
comb_matrix_m = tf.get_variable(
"matrix_momentum", [dm_shape[0], dm_shape[1]],
initializer=tf.zeros_initializer(),
dtype=_float_type, trainable=False
)
tf.add_to_collection(COMB_MATRIX_COLLECTION, comb_matrix)
poses = tf.tensordot(poses, comb_matrix, [[2], [1]])
poses = tf.transpose(poses, [0, 1, 3, 2])
# Special update code
def update_comb_mat(grad, lr):
I = tf.constant(np.eye(dm_shape[0]), dtype=_float_type)
# Momentum update
momentum_op = tf.assign(comb_matrix_m,
comb_matrix_m * 0.99 + (1 - 0.99) * tf.square(grad))
with tf.control_dependencies([momentum_op]):
# Matrix update
scaled_grad = lr * grad / tf.sqrt(comb_matrix_m + 1.e-5)
A = tf.matmul(tf.transpose(scaled_grad), comb_matrix) - \
tf.matmul(tf.transpose(comb_matrix), scaled_grad)
t1 = I + 0.5 * A
t2 = I - 0.5 * A
Y = tf.matmul(tf.matmul(tf.matrix_inverse(t1), t2), comb_matrix)
return tf.assign(comb_matrix, Y)
# Visualization
cb_min = tf.reduce_min(comb_matrix)
cb_max = tf.reduce_max(comb_matrix)
comb_matrix_image = (comb_matrix - cb_min) / (cb_max - cb_min) * 255.0
comb_matrix_image = tf.cast(comb_matrix_image, tf.uint8)
comb_matrix_image = tf.reshape(comb_matrix_image, [1, dm_shape[0], dm_shape[1], 1])
return poses, comb_matrix_image, update_comb_mat