def channel_wise_fc_layer(self, input, name): # bottom: (7x7x512)
_, width, height, n_feat_map = input.get_shape().as_list()
input_reshape = tf.reshape( input, [-1, width*height, n_feat_map] )
input_transpose = tf.transpose( input_reshape, [2,0,1] )
with tf.variable_scope(name):
W = tf.get_variable(
"W",
shape=[n_feat_map,width*height, width*height], # (512,49,49)
initializer=tf.random_normal_initializer(0., 0.005))
output = tf.batch_matmul(input_transpose, W)
output_transpose = tf.transpose(output, [1,2,0])
output_reshape = tf.reshape( output_transpose, [-1, height, width, n_feat_map] )
return output_reshape
python类batch_matmul()的实例源码
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. 1]
:param b: [N, M. 1]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(L, 0), [N, 1, 1])
sL = tf.tile(tf.expand_dims(sL, 0), [N, 1, 1])
logb = tf.log(b + 1e-9)
logb = tf.concat(1, [tf.zeros([N, 1, 1]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])])
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, M, M]
right = a * u_t # [N, M, d]
u = tf.batch_matmul(left, right) # [N, M, d]
return u
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. d]
:param b: [N, M. d]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(tf.expand_dims(L, 0), 0), [N, d, 1, 1])
sL = tf.tile(tf.expand_dims(tf.expand_dims(sL, 0), 0), [N, d, 1, 1])
logb = tf.log(b + 1e-9) # [N, M, d]
logb = tf.concat(1, [tf.zeros([N, 1, d]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])]) # [N, M, d]
logb = tf.expand_dims(tf.transpose(logb, [0, 2, 1]), -1) # [N, d, M, 1]
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, d, M, M]
right = a * u_t # [N, M, d]
right = tf.expand_dims(tf.transpose(right, [0, 2, 1]), -1) # [N, d, M, 1]
u = tf.batch_matmul(left, right) # [N, d, M, 1]
u = tf.transpose(tf.squeeze(u, [3]), [0, 2, 1]) # [N, M, d]
return u
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. 1]
:param b: [N, M. 1]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(L, 0), [N, 1, 1])
sL = tf.tile(tf.expand_dims(sL, 0), [N, 1, 1])
logb = tf.log(b + 1e-9)
logb = tf.concat(1, [tf.zeros([N, 1, 1]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])])
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, M, M]
right = a * u_t # [N, M, d]
u = tf.batch_matmul(left, right) # [N, M, d]
return u
def __call__(self, u_t, a, b, scope=None):
"""
:param u_t: [N, M, d]
:param a: [N, M. d]
:param b: [N, M. d]
:param mask: [N, M]
:return:
"""
N, M, d = self.batch_size, self.mem_size, self.hidden_size
L, sL = self.L, self.sL
with tf.name_scope(scope or self.__class__.__name__):
L = tf.tile(tf.expand_dims(tf.expand_dims(L, 0), 0), [N, d, 1, 1])
sL = tf.tile(tf.expand_dims(tf.expand_dims(sL, 0), 0), [N, d, 1, 1])
logb = tf.log(b + 1e-9) # [N, M, d]
logb = tf.concat(1, [tf.zeros([N, 1, d]), tf.slice(logb, [0, 1, 0], [-1, -1, -1])]) # [N, M, d]
logb = tf.expand_dims(tf.transpose(logb, [0, 2, 1]), -1) # [N, d, M, 1]
left = L * tf.exp(tf.batch_matmul(L, logb * sL)) # [N, d, M, M]
right = a * u_t # [N, M, d]
right = tf.expand_dims(tf.transpose(right, [0, 2, 1]), -1) # [N, d, M, 1]
u = tf.batch_matmul(left, right) # [N, d, M, 1]
u = tf.transpose(tf.squeeze(u, [3]), [0, 2, 1]) # [N, M, d]
return u
def channel_wise_fc_layer(bottom, name, bias=True):
"""
channel-wise fully connected layer
"""
_, width, height, n_feat_map = bottom.get_shape().as_list()
input_reshape = tf.reshape( bottom, [-1, width*height, n_feat_map] ) # order='C'
input_transpose = tf.transpose( input_reshape, [2,0,1] ) # n_feat_map * batch * d
with tf.variable_scope(name):
W = tf.get_variable(
"W",
shape=[n_feat_map,width*height, width*height], # n_feat_map * d * d_filter
initializer=tf.truncated_normal_initializer(0., 0.005))
output = tf.batch_matmul(input_transpose, W) # n_feat_map * batch * d_filter
if bias == True:
b = tf.get_variable(
"b",
shape=width*height,
initializer=tf.constant_initializer(0.))
output = tf.nn.bias_add(output, b)
output_transpose = tf.transpose(output, [1,2,0]) # batch * d_filter * n_feat_map
output_reshape = tf.reshape( output_transpose, [-1, width, height, n_feat_map] )
return output_reshape
def channel_wise_fc_layer(bottom, name, bias=True):
"""
channel-wise fully connected layer
"""
_, width, height, n_feat_map = bottom.get_shape().as_list()
input_reshape = tf.reshape( bottom, [-1, width*height, n_feat_map] ) # order='C'
input_transpose = tf.transpose( input_reshape, [2,0,1] ) # n_feat_map * batch * d
with tf.variable_scope(name):
W = tf.get_variable(
"W",
shape=[n_feat_map,width*height, width*height], # n_feat_map * d * d_filter
initializer=tf.truncated_normal_initializer(0., 0.005))
output = tf.batch_matmul(input_transpose, W) # n_feat_map * batch * d_filter
if bias == True:
b = tf.get_variable(
"b",
shape=width*height,
initializer=tf.constant_initializer(0.))
output = tf.nn.bias_add(output, b)
output_transpose = tf.transpose(output, [1,2,0]) # batch * d_filter * n_feat_map
output_reshape = tf.reshape( output_transpose, [-1, width, height, n_feat_map] )
return output_reshape
def channel_wise_fc_layer(bottom, name, bias=True):
"""
channel-wise fully connected layer
"""
_, width, height, n_feat_map = bottom.get_shape().as_list()
input_reshape = tf.reshape( bottom, [-1, width*height, n_feat_map] ) # order='C'
input_transpose = tf.transpose( input_reshape, [2,0,1] ) # n_feat_map * batch * d
with tf.variable_scope(name):
W = tf.get_variable(
"W",
shape=[n_feat_map,width*height, width*height], # n_feat_map * d * d_filter
initializer=tf.truncated_normal_initializer(0., 0.005))
output = tf.batch_matmul(input_transpose, W) # n_feat_map * batch * d_filter
if bias == True:
b = tf.get_variable(
"b",
shape=width*height,
initializer=tf.constant_initializer(0.))
output = tf.nn.bias_add(output, b)
output_transpose = tf.transpose(output, [1,2,0]) # batch * d_filter * n_feat_map
output_reshape = tf.reshape( output_transpose, [-1, width, height, n_feat_map] )
return output_reshape
def transition(h):
# compute A,B,o linearization matrices
with tf.variable_scope("trans"):
for l in range(2):
h = ReLU(h, 100, "aggregate_loss" + str(l))
with tf.variable_scope("A"):
v, r = tf.split(1, 2, linear(h, z_dim * 2))
v1 = tf.expand_dims(v, -1) # (batch, z_dim, 1)
rT = tf.expand_dims(r, 1) # batch, 1, z_dim
I = tf.diag([1.] * z_dim)
A = (
I + tf.batch_matmul(v1, rT)
) # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted)
with tf.variable_scope("B"):
B = linear(h, z_dim * u_dim)
B = tf.reshape(B, [-1, z_dim, u_dim])
with tf.variable_scope("o"):
o = linear(h, z_dim)
return A, B, o, v, r
def transition(h,share=None):
# compute A,B,o linearization matrices
with tf.variable_scope("trans",reuse=share):
for l in range(2):
h=ReLU(h,100,"aggregate_loss"+str(l))
with tf.variable_scope("A"):
v,r=tf.split(1,2,linear(h,z_dim*2))
v1=tf.expand_dims(v,-1) # (batch, z_dim, 1)
rT=tf.expand_dims(r,1) # batch, 1, z_dim
I=tf.diag([1.]*z_dim)
A=(I+tf.batch_matmul(v1,rT)) # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted)
with tf.variable_scope("B"):
B=linear(h,z_dim*u_dim)
B=tf.reshape(B,[-1,z_dim,u_dim])
with tf.variable_scope("o"):
o=linear(h,z_dim)
return A,B,o,v,r
def spatial_transformer(U, theta, out_height, out_width):
num_batch = tf.shape(U)[0]
height, width, num_channels = U.get_shape()[1:]
x_t, y_t = meshgrid(out_height, out_width)
x_t = tf.expand_dims(x_t, 0)
y_t = tf.expand_dims(y_t, 0)
if theta.get_shape()[1] == 3:
s, t_x, t_y = tf.split(1, 3, theta)
x_s = tf.reshape(s*tf.tile(x_t, [num_batch,1]) + t_x, [-1])
y_s = tf.reshape(s*tf.tile(y_t, [num_batch,1]) + t_y, [-1])
else:
grid = tf.expand_dims(tf.concat(0, [x_t, y_t, tf.ones_like(x_t)]), 0)
grid = tf.tile(grid, [num_batch,1,1])
grid_t = tf.batch_matmul(tf.reshape(theta, [-1,2,3]), grid)
x_s = tf.reshape(tf.slice(grid_t, [0,0,0], [-1,1,-1]), [-1])
y_s = tf.reshape(tf.slice(grid_t, [0,1,0], [-1,1,-1]), [-1])
return transform(U, x_s, y_s, num_batch, out_height, out_width, num_channels)
# last layer of localization net
def spatial_transformer(U, theta, out_height, out_width):
num_batch = tf.shape(U)[0]
height, width, num_channels = U.get_shape()[1:]
x_t, y_t = meshgrid(out_height, out_width)
x_t = tf.expand_dims(x_t, 0)
y_t = tf.expand_dims(y_t, 0)
if theta.get_shape()[1] == 3:
s, t_x, t_y = tf.split(1, 3, theta)
x_s = tf.reshape(s*tf.tile(x_t, [num_batch,1]) + t_x, [-1])
y_s = tf.reshape(s*tf.tile(y_t, [num_batch,1]) + t_y, [-1])
else:
grid = tf.expand_dims(tf.concat(0, [x_t, y_t, tf.ones_like(x_t)]), 0)
grid = tf.tile(grid, [num_batch,1,1])
grid_t = tf.batch_matmul(tf.reshape(theta, [-1,2,3]), grid)
x_s = tf.reshape(tf.slice(grid_t, [0,0,0], [-1,1,-1]), [-1])
y_s = tf.reshape(tf.slice(grid_t, [0,1,0], [-1,1,-1]), [-1])
return transform(U, x_s, y_s, num_batch, out_height, out_width, num_channels)
# last layer of localization net
def _define_diag_covariance_probs(self, shard_id, shard):
"""Defines the diagonal covariance probabilities per example in a class.
Args:
shard_id: id of the current shard.
shard: current data shard, 1 X num_examples X dimensions.
Returns a matrix num_examples * num_classes.
"""
# num_classes X 1
# TODO(xavigonzalvo): look into alternatives to log for
# reparametrization of variance parameters.
det_expanded = tf.reduce_sum(tf.log(self._covs + 1e-3),
1, keep_dims=True)
diff = shard - self._means
x2 = tf.square(diff)
cov_expanded = tf.expand_dims(1.0 / (self._covs + 1e-3), 2)
# num_classes X num_examples
x2_cov = tf.batch_matmul(x2, cov_expanded)
x2_cov = tf.transpose(tf.squeeze(x2_cov, [2]))
self._probs[shard_id] = -0.5 * (
tf.to_float(self._dimensions) * tf.log(2.0 * np.pi) +
tf.transpose(det_expanded) + x2_cov)
def _define_partial_maximization_operation(self, shard_id, shard):
"""Computes the partial statistics of the means and covariances.
Args:
shard_id: current shard id.
shard: current data shard, 1 X num_examples X dimensions.
"""
# Soft assignment of each data point to each of the two clusters.
self._points_in_k[shard_id] = tf.reduce_sum(self._w[shard_id], 0,
keep_dims=True)
# Partial means.
w_mul_x = tf.expand_dims(
tf.matmul(self._w[shard_id],
tf.squeeze(shard, [0]), transpose_a=True), 1)
self._w_mul_x.append(w_mul_x)
# Partial covariances.
x = tf.concat(0, [shard for _ in range(self._num_classes)])
x_trans = tf.transpose(x, perm=[0, 2, 1])
x_mul_w = tf.concat(0, [
tf.expand_dims(x_trans[k, :, :] * self._w[shard_id][:, k], 0)
for k in range(self._num_classes)])
self._w_mul_x2.append(tf.batch_matmul(x_mul_w, x))
def _define_diag_covariance_probs(self, shard_id, shard):
"""Defines the diagonal covariance probabilities per example in a class.
Args:
shard_id: id of the current shard.
shard: current data shard, 1 X num_examples X dimensions.
Returns a matrix num_examples * num_classes.
"""
# num_classes X 1
# TODO(xavigonzalvo): look into alternatives to log for
# reparametrization of variance parameters.
det_expanded = tf.reduce_sum(tf.log(self._covs + 1e-3),
1, keep_dims=True)
diff = shard - self._means
x2 = tf.square(diff)
cov_expanded = tf.expand_dims(1.0 / (self._covs + 1e-3), 2)
# num_classes X num_examples
x2_cov = tf.batch_matmul(x2, cov_expanded)
x2_cov = tf.transpose(tf.squeeze(x2_cov, [2]))
self._probs[shard_id] = -0.5 * (
tf.to_float(self._dimensions) * tf.log(2.0 * np.pi) +
tf.transpose(det_expanded) + x2_cov)
def _define_partial_maximization_operation(self, shard_id, shard):
"""Computes the partial statistics of the means and covariances.
Args:
shard_id: current shard id.
shard: current data shard, 1 X num_examples X dimensions.
"""
# Soft assignment of each data point to each of the two clusters.
self._points_in_k[shard_id] = tf.reduce_sum(self._w[shard_id], 0,
keep_dims=True)
# Partial means.
w_mul_x = tf.expand_dims(
tf.matmul(self._w[shard_id],
tf.squeeze(shard, [0]), transpose_a=True), 1)
self._w_mul_x.append(w_mul_x)
# Partial covariances.
x = tf.concat(0, [shard for _ in range(self._num_classes)])
x_trans = tf.transpose(x, perm=[0, 2, 1])
x_mul_w = tf.concat(0, [
tf.expand_dims(x_trans[k, :, :] * self._w[shard_id][:, k], 0)
for k in range(self._num_classes)])
self._w_mul_x2.append(tf.batch_matmul(x_mul_w, x))
def buildAttention(self):
q_relu = self.tensors['q_relu']
a_relu = self.tensors['a_relu']
with tf.name_scope("attention"):
W = identity([self.params['nb_filter'], self.params['nb_filter']], name='W')
batch = tf.shape(q_relu)[0]
q_matmul = tf.batch_matmul(q_relu, tf.tile(tf.expand_dims(W,[0]), tf.pack([batch, tf.constant(1), tf.constant(1)])))
qa_attention = tf.batch_matmul(q_matmul, a_relu, adj_x=False, adj_y=True, name="attention")
# shape = (batch, q_length, 1)
qa_attention = tf.tanh(qa_attention)
q_max = tf.reduce_max(qa_attention, reduction_indices=[2], keep_dims=True, name='q_max')
# shape = (batch, 1, a_length)
a_max = tf.reduce_max(qa_attention, reduction_indices=[1], keep_dims=True, name='a_max')
# shape = (batch, q_length, 1)
q_softmax = tf.expand_dims(tf.nn.softmax(tf.squeeze(q_max, [2])), -1)
# shape = (batch, a_length, 1)
a_softmax = tf.expand_dims(tf.nn.softmax(tf.squeeze(a_max, [1])), -1)
# https://www.tensorflow.org/versions/r0.9/api_docs/python/math_ops.html#batch_matmul
# shape = (batch, NUM_FILTERS, 1)
q_feature = tf.batch_matmul(q_relu, q_softmax, adj_x=True, adj_y=False)
a_feature = tf.batch_matmul(a_relu, a_softmax, adj_x=True, adj_y=False)
self.tensors['q_feature'] = q_feature
self.tensors['a_feature'] = a_feature
self.tensors.setdefault('weights', []).append(W)
def soft_attn(self, top_recur):
""""""
reuse = (self.moving_params is not None) or None
input_size = top_recur.get_shape().as_list()[-1]
with tf.variable_scope('MLP', reuse=reuse):
head_mlp, dep_mlp = self.MLP(top_recur, self.info_mlp_size,
func=self.info_func,
keep_prob=self.info_keep_prob,
n_splits=2)
with tf.variable_scope('Arcs', reuse=reuse):
arc_logits = self.bilinear_classifier(dep_mlp, head_mlp, keep_prob=self.info_keep_prob)
arc_prob = self.softmax(arc_logits)
head_lin = tf.batch_matmul(arc_prob, top_recur)
top_recur = tf.concat(2, [top_recur, head_lin])
top_recur.set_shape([tf.Dimension(None), tf.Dimension(None), tf.Dimension(4*self.recur_size)])
return top_recur
#=============================================================
def read(x,x_hat,h_dec_prev):
"""Function to implement eq 27"""
Fx,Fy,gamma=attn_window("read",h_dec_prev,patch_read)
# gamma in [batch_size,1]
# Fx in [batch_size, patch_read, 28]
def filter_img(img,Fx,Fy,gamma,N):
Fxt=tf.transpose(Fx,perm=[0,2,1])
img=tf.reshape(img,[-1,B,A]) # in [batch_size, 28,28]
glimpse=tf.batch_matmul(Fy,tf.batch_matmul(img,Fxt)) #in [batch_size, patch_read, patch_read]
glimpse=tf.reshape(glimpse,[-1,N*N]) # in batch_size, patch_read*patch_read
return glimpse*tf.reshape(gamma,[-1,1])
x=filter_img(x,Fx,Fy,gamma,patch_read) # batch x (patch_read*patch_read)
x_hat=filter_img(x_hat,Fx,Fy,gamma,patch_read)
# x in [batch_size, patch_read^2]
# x_hat in [batch_size, patch_read^2]
return tf.concat(1,[x,x_hat]) # concat along feature axis
def transition(h):
# compute A,B,o linearization matrices
with tf.variable_scope("trans"):
for l in range(2):
h=ReLU(h,100,"l"+str(l))
with tf.variable_scope("A"):
v,r=tf.split(1,2,linear(h,z_dim*2))
v1=tf.expand_dims(v,-1) # (batch, z_dim, 1)
rT=tf.expand_dims(r,1) # batch, 1, z_dim
I=tf.diag([1.]*z_dim)
A=(I+tf.batch_matmul(v1,rT)) # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted)
with tf.variable_scope("B"):
B=linear(h,z_dim*u_dim)
B=tf.reshape(B,[-1,z_dim,u_dim])
with tf.variable_scope("o"):
o=linear(h,z_dim)
return A,B,o,v,r
def transition(h,share=None):
# compute A,B,o linearization matrices
with tf.variable_scope("trans",reuse=share):
for l in range(2):
h=ReLU(h,100,"l"+str(l))
with tf.variable_scope("A"):
v,r=tf.split(1,2,linear(h,z_dim*2))
v1=tf.expand_dims(v,-1) # (batch, z_dim, 1)
rT=tf.expand_dims(r,1) # batch, 1, z_dim
I=tf.diag([1.]*z_dim)
A=(I+tf.batch_matmul(v1,rT)) # (z_dim, z_dim) + (batch, z_dim, 1)*(batch, 1, z_dim) (I is broadcasted)
with tf.variable_scope("B"):
B=linear(h,z_dim*u_dim)
B=tf.reshape(B,[-1,z_dim,u_dim])
with tf.variable_scope("o"):
o=linear(h,z_dim)
return A,B,o,v,r
def address(M0, w0, head):
# Content focusing
# Compute cosine similarity
key = tf.expand_dims(head["key"], 1)
key_matches = tf.batch_matmul(key, tf.transpose(M0, [0, 2, 1]))
key_matches = tf.squeeze(key_matches)
key_mag = tf.expand_dims(NTMCell.magnitude(head["key"], 1), 1)
M_col_mag = NTMCell.magnitude(M0, 2)
cosine_sim = key_matches / (key_mag * M_col_mag)
# Compute content weights
wc = tf.nn.softmax(head["key_str"] * cosine_sim)
# Location focusing
wg = head["interp"] * wc + (1 - head["interp"]) * w0
ws = rotate.ntm_rotate(wg, head["shift"])
ws_pow = tf.pow(ws, head["sharp"])
w1 = ws_pow / tf.reduce_sum(ws_pow, 1, keep_dims=True)
return w1
def grams(X):
dim_ordering = K.image_dim_ordering()
if dim_ordering == 'tf':
X = K.permute_dimensions(X, (0, 3, 1, 2))
(samples, c, h, w) = get_shape(X)
X_reshaped = K.reshape(X, (-1, c, h * w))
X_T = K.permute_dimensions(X_reshaped, (0, 2, 1))
if K._BACKEND == 'theano':
X_gram = T.batched_dot(X_reshaped, X_T)
else:
X_gram = tf.batch_matmul(X_reshaped, X_T)
X_gram /= c * h * w
return X_gram
def read_attention(self, x, x_hat, h_dec_prev):
Fx, Fy, gamma = self.attn_window("read", h_dec_prev)
# we have the parameters for a patch of gaussian filters. apply them.
def filter_img(img, Fx, Fy, gamma):
Fxt = tf.transpose(Fx, perm=[0,2,1])
img = tf.reshape(img, [-1, self.img_size, self.img_size])
# Apply the gaussian patches:
# keep in mind: horiz = imgsize = verts (they are all the image size)
# keep in mind: attn = height/length of attention patches
# allfilters = [attn, vert] * [imgsize,imgsize] * [horiz, attn]
# we have batches, so the full batch_matmul equation looks like:
# [1, 1, vert] * [batchsize,imgsize,imgsize] * [1, horiz, 1]
glimpse = tf.batch_matmul(Fy, tf.batch_matmul(img, Fxt))
glimpse = tf.reshape(glimpse, [-1, self.attention_n**2])
# finally scale this glimpse w/ the gamma parameter
return glimpse * tf.reshape(gamma, [-1, 1])
x = filter_img(x, Fx, Fy, gamma)
x_hat = filter_img(x_hat, Fx, Fy, gamma)
return tf.concat(1, [x, x_hat])
# encode an attention patch
def write_attention(self, hidden_layer):
with tf.variable_scope("writeW", reuse=self.share_parameters):
w = dense(hidden_layer, self.n_hidden, self.attention_n*self.attention_n*self.num_colors)
w = tf.reshape(w, [self.batch_size, self.attention_n, self.attention_n, self.num_colors])
w_t = tf.transpose(w, perm=[3,0,1,2])
Fx, Fy, gamma = self.attn_window("write", hidden_layer)
# color1, color2, color3, color1, color2, color3, etc.
w_array = tf.reshape(w_t, [self.num_colors * self.batch_size, self.attention_n, self.attention_n])
Fx_array = tf.concat(0, [Fx, Fx, Fx])
Fy_array = tf.concat(0, [Fy, Fy, Fy])
Fyt = tf.transpose(Fy_array, perm=[0,2,1])
# [vert, attn_n] * [attn_n, attn_n] * [attn_n, horiz]
wr = tf.batch_matmul(Fyt, tf.batch_matmul(w_array, Fx_array))
sep_colors = tf.reshape(wr, [self.batch_size, self.num_colors, self.img_size**2])
wr = tf.reshape(wr, [self.num_colors, self.batch_size, self.img_size, self.img_size])
wr = tf.transpose(wr, [1,2,3,0])
wr = tf.reshape(wr, [self.batch_size, self.img_size * self.img_size * self.num_colors])
return wr * tf.reshape(1.0/gamma, [-1, 1])
def test_MatMul(self):
t = tf.matmul(*self.random((4, 3), (3, 5)), transpose_a=False, transpose_b=False)
self.check(t)
t = tf.matmul(*self.random((3, 4), (3, 5)), transpose_a=True, transpose_b=False)
self.check(t)
t = tf.matmul(*self.random((4, 3), (5, 3)), transpose_a=False, transpose_b=True)
self.check(t)
t = tf.matmul(*self.random((3, 4), (5, 3)), transpose_a=True, transpose_b=True)
self.check(t)
# def test_BatchMatMul(self):
# t = tf.batch_matmul(*self.random((2, 4, 4, 3), (2, 4, 3, 5)), adj_x=False, adj_y=False)
# self.check(t)
# t = tf.batch_matmul(*self.random((2, 4, 3, 4), (2, 4, 3, 5)), adj_x=True, adj_y=False)
# self.check(t)
# t = tf.batch_matmul(*self.random((2, 4, 4, 3), (2, 4, 5, 3)), adj_x=False, adj_y=True)
# self.check(t)
# t = tf.batch_matmul(*self.random((2, 4, 3, 4), (2, 4, 5, 3)), adj_x=True, adj_y=True)
# self.check(t)
def decode(self, input):
# returns a decoder
hidden = tf.matmul(input, self.weights["decoder1_weights"]) + self.weights["decoder1_biases"]
hidden_relu = tf.nn.relu(hidden)
# output is encoding_size x 1 x small_encoding_size
# multiheaded_hidden = tf.matmul(input, self.weights["multiheaded1_weights"]) + self.weights["multiheaded1_biases"]
# multiheaded_hidden = tf.reshape(multiheaded_hidden, [-1, self.arch_params['output_dim'], 1, self.arch_params['small_encoding_dim']])
# multiheaded_hidden = tf.nn.relu(multiheaded_hidden)
#
# h = tf.scan(lambda a,x: tf.batch_matmul(x, self.weights["multiheaded2_weights"]), multiheaded_hidden,
# initializer=tf.Variable(tf.constant(0.0, shape=[self.arch_params['output_dim'],1,1])))
# multiheaded_output = h + self.weights["multiheaded2_biases"]
# output1 = tf.reshape(multiheaded_output, [-1, self.arch_params['output_dim']])
output1 = tf.matmul(hidden_relu, self.weights["decoder2_weights"]) + self.weights["decoder2_biases"]
output = output1
return output
def decode(self, input):
# returns a decoder
hidden = tf.matmul(input, self.weights["decoder1_weights"]) + self.weights["decoder1_biases"]
hidden_relu = tf.nn.relu(hidden)
# output is encoding_size x 1 x small_encoding_size
# multiheaded_hidden = tf.matmul(input, self.weights["multiheaded1_weights"]) + self.weights["multiheaded1_biases"]
# multiheaded_hidden = tf.reshape(multiheaded_hidden, [-1, self.arch_params['output_dim'], 1, self.arch_params['small_encoding_dim']])
# multiheaded_hidden = tf.nn.relu(multiheaded_hidden)
#
# h = tf.scan(lambda a,x: tf.batch_matmul(x, self.weights["multiheaded2_weights"]), multiheaded_hidden,
# initializer=tf.Variable(tf.constant(0.0, shape=[self.arch_params['output_dim'],1,1])))
# multiheaded_output = h + self.weights["multiheaded2_biases"]
# output1 = tf.reshape(multiheaded_output, [-1, self.arch_params['output_dim']])
output1 = tf.matmul(hidden_relu, self.weights["decoder2_weights"]) + self.weights["decoder2_biases"]
output = output1
return output
def decode(self, input):
# returns a decoder
hidden = tf.matmul(input, self.weights["decoder1_weights"]) + self.weights["decoder1_biases"]
hidden_relu = tf.nn.relu(hidden)
# output is encoding_size x 1 x small_encoding_size
# multiheaded_hidden = tf.matmul(input, self.weights["multiheaded1_weights"]) + self.weights["multiheaded1_biases"]
# multiheaded_hidden = tf.reshape(multiheaded_hidden, [-1, self.arch_params['output_dim'], 1, self.arch_params['small_encoding_dim']])
# multiheaded_hidden = tf.nn.relu(multiheaded_hidden)
#
# h = tf.scan(lambda a,x: tf.batch_matmul(x, self.weights["multiheaded2_weights"]), multiheaded_hidden,
# initializer=tf.Variable(tf.constant(0.0, shape=[self.arch_params['output_dim'],1,1])))
# multiheaded_output = h + self.weights["multiheaded2_biases"]
# output1 = tf.reshape(multiheaded_output, [-1, self.arch_params['output_dim']])
output1 = tf.matmul(hidden_relu, self.weights["decoder2_weights"]) + self.weights["decoder2_biases"]
output = output1
return output
def decode(self, input):
# returns a decoder
hidden = tf.matmul(input, self.weights["decoder1_weights"]) + self.weights["decoder1_biases"]
hidden_relu = tf.nn.relu(hidden)
# output is encoding_size x 1 x small_encoding_size
# multiheaded_hidden = tf.matmul(input, self.weights["multiheaded1_weights"]) + self.weights["multiheaded1_biases"]
# multiheaded_hidden = tf.reshape(multiheaded_hidden, [-1, self.arch_params['output_dim'], 1, self.arch_params['small_encoding_dim']])
# multiheaded_hidden = tf.nn.relu(multiheaded_hidden)
#
# h = tf.scan(lambda a,x: tf.batch_matmul(x, self.weights["multiheaded2_weights"]), multiheaded_hidden,
# initializer=tf.Variable(tf.constant(0.0, shape=[self.arch_params['output_dim'],1,1])))
# multiheaded_output = h + self.weights["multiheaded2_biases"]
# output1 = tf.reshape(multiheaded_output, [-1, self.arch_params['output_dim']])
output1 = tf.matmul(hidden_relu, self.weights["decoder2_weights"]) + self.weights["decoder2_biases"]
output = output1
return output