def trainable_initial_state(self, batch_size):
"""
Create a trainable initial state for the BasicLSTMCell
:param batch_size: number of samples per batch
:return: LSTMStateTuple
"""
def _create_initial_state(batch_size, state_size, trainable=True, initializer=tf.random_normal_initializer()):
with tf.device('/cpu:0'):
s = tf.get_variable('initial_state', shape=[1, state_size], dtype=tf.float32, trainable=trainable,
initializer=initializer)
state = tf.tile(s, tf.stack([batch_size] + [1]))
return state
with tf.variable_scope('initial_c'):
initial_c = _create_initial_state(batch_size, self._num_units)
with tf.variable_scope('initial_h'):
initial_h = _create_initial_state(batch_size, self._num_units)
return tf.contrib.rnn.LSTMStateTuple(initial_c, initial_h)
python类random_normal_initializer()的实例源码
def deconv2d(input_, output_shape,
k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
name="deconv2d", with_w=False):
with tf.variable_scope(name):
# filter : [height, width, output_channels, in_channels]
w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]],
initializer=tf.random_normal_initializer(stddev=stddev))
tf_output_shape=tf.stack(output_shape)
deconv = tf.nn.conv2d_transpose(input_, w, output_shape=tf_output_shape,
strides=[1, d_h, d_w, 1])
biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
#deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
deconv = tf.reshape(tf.nn.bias_add(deconv, biases), tf_output_shape)
if with_w:
return deconv, w, biases
else:
return deconv
def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
shape = input_.get_shape().as_list()
#mat_shape=tf.stack([tf.shape(input_)[1],output_size])
mat_shape=[shape[1],output_size]
with tf.variable_scope(scope or "Linear"):
#matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
matrix = tf.get_variable("Matrix", mat_shape, tf.float32,
tf.random_normal_initializer(stddev=stddev))
bias = tf.get_variable("bias", [output_size],
initializer=tf.constant_initializer(bias_start))
if with_w:
return tf.matmul(input_, matrix) + bias, matrix, bias
else:
return tf.matmul(input_, matrix) + bias
#minibatch method that improves on openai
#because it doesn't fix batchsize:
#TODO: recheck when not sleepy
def __call__(self, x, train=True):
shape = x.get_shape().as_list()
if train:
with tf.variable_scope(self.name) as scope:
self.beta = tf.get_variable("beta", [shape[-1]],
initializer=tf.constant_initializer(0.))
self.gamma = tf.get_variable("gamma", [shape[-1]],
initializer=tf.random_normal_initializer(1., 0.02))
batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments')
ema_apply_op = self.ema.apply([batch_mean, batch_var])
self.ema_mean, self.ema_var = self.ema.average(batch_mean), self.ema.average(batch_var)
with tf.control_dependencies([ema_apply_op]):
mean, var = tf.identity(batch_mean), tf.identity(batch_var)
else:
mean, var = self.ema_mean, self.ema_var
normed = tf.nn.batch_norm_with_global_normalization(
x, mean, var, self.beta, self.gamma, self.epsilon, scale_after_normalization=True)
return normed
# standard convolution layer
def Minibatch_Discriminator(input, num_kernels=100, dim_per_kernel=5, init=False, name='MD'):
num_inputs=df_dim*4
theta = tf.get_variable(name+"/theta",[num_inputs, num_kernels, dim_per_kernel], initializer=tf.random_normal_initializer(stddev=0.05))
log_weight_scale = tf.get_variable(name+"/lws",[num_kernels, dim_per_kernel], initializer=tf.constant_initializer(0.0))
W = tf.mul(theta, tf.expand_dims(tf.exp(log_weight_scale)/tf.sqrt(tf.reduce_sum(tf.square(theta),0)),0))
W = tf.reshape(W,[-1,num_kernels*dim_per_kernel])
x = input
x=tf.reshape(x, [batchsize,num_inputs])
activation = tf.matmul(x, W)
activation = tf.reshape(activation,[-1,num_kernels,dim_per_kernel])
abs_dif = tf.mul(tf.reduce_sum(tf.abs(tf.sub(tf.expand_dims(activation,3),tf.expand_dims(tf.transpose(activation,[1,2,0]),0))),2),
1-tf.expand_dims(tf.constant(np.eye(batchsize),dtype=np.float32),1))
f = tf.reduce_sum(tf.exp(-abs_dif),2)/tf.reduce_sum(tf.exp(-abs_dif))
print(f.get_shape())
print(input.get_shape())
return tf.concat(1,[x, f])
a2_attention_between_enc_dec.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def __init__(self,d_model, d_k, d_v, sequence_length, h, batch_size,Q, K_s,layer_index,decoder_sent_length,type="attention",mask=None,dropout_keep_prob=None):
"""
:param d_model:
:param d_k:
:param d_v:
:param sequence_length:
:param h:
:param batch_size:
:param Q: value from decoder
:param K_s: output of encoder
"""
super(AttentionEncoderDecoder, self).__init__(d_model, d_k, d_v, sequence_length, h, batch_size)
self.Q=Q
self.K_s=K_s
self.layer_index=layer_index
self.type=type
self.decoder_sent_length=decoder_sent_length
self.initializer = tf.random_normal_initializer(stddev=0.1)
self.mask=mask
self.dropout_keep_prob=dropout_keep_prob
def __init__(self,d_model,d_k,d_v,sequence_length,h,batch_size,num_layer,Q,K_s,type='encoder',mask=None,dropout_keep_prob=None,use_residual_conn=True):
"""
:param d_model:
:param d_k:
:param d_v:
:param sequence_length:
:param h:
:param batch_size:
:param embedded_words: shape:[batch_size*sequence_length,embed_size]
"""
super(Encoder, self).__init__(d_model,d_k,d_v,sequence_length,h,batch_size,num_layer=num_layer)
self.Q=Q
self.K_s=K_s
self.type=type
self.mask=mask
self.initializer = tf.random_normal_initializer(stddev=0.1)
self.dropout_keep_prob=dropout_keep_prob
self.use_residual_conn=use_residual_conn
def init():
#1. assign value to fields
vocab_size=1000
d_model = 512
d_k = 64
d_v = 64
sequence_length = 5*10
h = 8
batch_size=4*32
initializer = tf.random_normal_initializer(stddev=0.1)
# 2.set values for Q,K,V
vocab_size=1000
embed_size=d_model
Embedding = tf.get_variable("Embedding_E", shape=[vocab_size, embed_size],initializer=initializer)
input_x = tf.placeholder(tf.int32, [batch_size,sequence_length], name="input_x") #[4,10]
print("input_x:",input_x)
embedded_words = tf.nn.embedding_lookup(Embedding, input_x) #[batch_size*sequence_length,embed_size]
Q = embedded_words # [batch_size*sequence_length,embed_size]
K_s = embedded_words # [batch_size*sequence_length,embed_size]
num_layer=6
mask = get_mask(batch_size, sequence_length)
#3. get class object
encoder_class=Encoder(d_model,d_k,d_v,sequence_length,h,batch_size,num_layer,Q,K_s,mask=mask) #Q,K_s,embedded_words
return encoder_class,Q,K_s
def deconv2d(input_, output_shape, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
name="deconv2d", with_w=False):
with tf.variable_scope(name):
# filter : [height, width, output_channels, in_channels]
#w = tf.get_variable('w', [k_h, k_h, output_shape[-1], input_.get_shape()[-1]], initializer=tf.random_normal_initializer(stddev=stddev))
w = tf.get_variable('w', [k_h, k_h, output_shape[-1], input_.get_shape()[-1]], initializer = tf.contrib.layers.xavier_initializer())
try:
deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape,
strides=[1, d_h, d_w, 1])
# Support for verisons of TensorFlow before 0.7.0
except AttributeError:
deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape,
strides=[1, d_h, d_w, 1])
biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
if with_w:
return deconv, w, biases
else:
return deconv
def __call__(self, input_layer, output_shape,
k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
name="deconv2d"):
output_shape[0] = input_layer.shape[0]
ts_output_shape = tf.pack(output_shape)
with tf.variable_scope(name):
# filter : [height, width, output_channels, in_channels]
w = self.variable('w', [k_h, k_w, output_shape[-1], input_layer.shape[-1]],
init=tf.random_normal_initializer(stddev=stddev))
try:
deconv = tf.nn.conv2d_transpose(input_layer, w,
output_shape=ts_output_shape,
strides=[1, d_h, d_w, 1])
# Support for versions of TensorFlow before 0.7.0
except AttributeError:
deconv = tf.nn.deconv2d(input_layer, w, output_shape=ts_output_shape,
strides=[1, d_h, d_w, 1])
# biases = self.variable('biases', [output_shape[-1]], init=tf.constant_initializer(0.0))
# deconv = tf.reshape(tf.nn.bias_add(deconv, biases), [-1] + output_shape[1:])
deconv = tf.reshape(deconv, [-1] + output_shape[1:])
return deconv
def __call__(self, input_layer, output_size, scope=None, in_dim=None, stddev=0.02, bias_start=0.0):
shape = input_layer.shape
input_ = input_layer.tensor
try:
if len(shape) == 4:
input_ = tf.reshape(input_, tf.pack([tf.shape(input_)[0], np.prod(shape[1:])]))
input_.set_shape([None, np.prod(shape[1:])])
shape = input_.get_shape().as_list()
with tf.variable_scope(scope or "Linear"):
matrix = self.variable("Matrix", [in_dim or shape[1], output_size], dt=tf.float32,
init=tf.random_normal_initializer(stddev=stddev))
bias = self.variable("bias", [output_size], init=tf.constant_initializer(bias_start))
return input_layer.with_tensor(tf.matmul(input_, matrix) + bias, parameters=self.vars)
except Exception:
import ipdb; ipdb.set_trace()
def _create_fully_connected(self, prev_layer, num_neurones, layer_name, save_vars=False):
with tf.variable_scope(layer_name) as scope:
try:
b, x, y, z = prev_layer.get_shape().as_list()
flat_size = x*y*z
except:
flat_size = prev_layer.get_shape().as_list()[1]
flat = tf.reshape(prev_layer,shape=[-1,flat_size])
w = tf.get_variable(name="weights",shape=[flat_size,num_neurones],initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable(name="biases",shape=[num_neurones],initializer=tf.random_normal_initializer())
out = tf.matmul(flat,w) + b
full = tf.nn.relu(out)
if save_vars:
self.var_list += [w, b]
return full
def _create_fully_connected(self, prev_layer, num_neurones, layer_name, save_vars=False):
with tf.variable_scope(layer_name) as scope:
try:
b, x, y, z = prev_layer.get_shape().as_list()
flat_size = x*y*z
except:
flat_size = prev_layer.get_shape().as_list()[1]
flat = tf.reshape(prev_layer,shape=[-1,flat_size])
w = tf.get_variable(name="weights",shape=[flat_size,num_neurones],initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable(name="biases",shape=[num_neurones],initializer=tf.random_normal_initializer())
out = tf.matmul(flat,w) + b
full = tf.nn.relu(out)
if save_vars:
self.var_list += [w, b]
return full
ops.py 文件源码
项目:Unsupervised-Anomaly-Detection-with-Generative-Adversarial-Networks
作者: xtarx
项目源码
文件源码
阅读 36
收藏 0
点赞 0
评论 0
def deconv2d(input_, output_shape,
k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
name="deconv2d", with_w=False):
with tf.variable_scope(name):
# filter : [height, width, output_channels, in_channels]
w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]],
initializer=tf.random_normal_initializer(stddev=stddev))
try:
deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape,
strides=[1, d_h, d_w, 1])
# Support for verisons of TensorFlow before 0.7.0
except AttributeError:
deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape,
strides=[1, d_h, d_w, 1])
biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
if with_w:
return deconv, w, biases
else:
return deconv
def deconv2d(input_, output_shape,
k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
name="deconv2d", with_w=False):
with tf.variable_scope(name):
# filter : [height, width, output_channels, in_channels]
w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]],
initializer=tf.random_normal_initializer(stddev=stddev))
try:
deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape,
strides=[1, d_h, d_w, 1])
# Support for verisons of TensorFlow before 0.7.0
except AttributeError:
deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape,
strides=[1, d_h, d_w, 1])
biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
if with_w:
return deconv, w, biases
else:
return deconv
def new_fc_layer( self, bottom, output_size, name ):
shape = bottom.get_shape().as_list()
dim = np.prod( shape[1:] )
x = tf.reshape( bottom, [-1, dim])
input_size = dim
with tf.variable_scope(name):
w = tf.get_variable(
"W",
shape=[input_size, output_size],
initializer=tf.random_normal_initializer(0., 0.005))
b = tf.get_variable(
"b",
shape=[output_size],
initializer=tf.constant_initializer(0.))
fc = tf.nn.bias_add( tf.matmul(x, w), b)
return fc
def channel_wise_fc_layer(self, input, name): # bottom: (7x7x512)
_, width, height, n_feat_map = input.get_shape().as_list()
input_reshape = tf.reshape( input, [-1, width*height, n_feat_map] )
input_transpose = tf.transpose( input_reshape, [2,0,1] )
with tf.variable_scope(name):
W = tf.get_variable(
"W",
shape=[n_feat_map,width*height, width*height], # (512,49,49)
initializer=tf.random_normal_initializer(0., 0.005))
output = tf.batch_matmul(input_transpose, W)
output_transpose = tf.transpose(output, [1,2,0])
output_reshape = tf.reshape( output_transpose, [-1, height, width, n_feat_map] )
return output_reshape
def new_fc_layer( self, bottom, output_size, name ):
shape = bottom.get_shape().as_list()
dim = np.prod( shape[1:] )
x = tf.reshape( bottom, [-1, dim])
input_size = dim
with tf.variable_scope(name):
w = tf.get_variable(
"W",
shape=[input_size, output_size],
initializer=tf.random_normal_initializer(0., 0.005))
b = tf.get_variable(
"b",
shape=[output_size],
initializer=tf.constant_initializer(0.))
fc = tf.nn.bias_add( tf.matmul(x, w), b)
return fc
def linear(input, output_dim, scope=None, stddev=None):
if stddev:
norm = tf.random_normal_initializer(stddev=stddev)
else:
norm = tf.random_normal_initializer(
stddev=np.sqrt(2.0 / input.get_shape()[1].value)
)
const = tf.constant_initializer(0.0)
with tf.variable_scope(scope or 'linear'):
w = tf.get_variable(
'w',
[input.get_shape()[1], output_dim],
initializer=norm
)
b = tf.get_variable('b', [output_dim], initializer=const)
return tf.matmul(input, w) + b
def batch_norm(x, n_out, phase_train, scope='bn', decay=0.9, eps=1e-5, stddev=0.02):
"""
Code taken from http://stackoverflow.com/a/34634291/2267819
"""
with tf.variable_scope(scope):
beta = tf.get_variable(name='beta', shape=[n_out], initializer=tf.constant_initializer(0.0)
, trainable=True)
gamma = tf.get_variable(name='gamma', shape=[n_out], initializer=tf.random_normal_initializer(1.0, stddev),
trainable=True)
batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments')
ema = tf.train.ExponentialMovingAverage(decay=decay)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
mean, var = tf.cond(phase_train,
mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, eps)
return normed
def deconv2d(input_, output_shape,
k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
name="deconv2d", with_w=False):
with tf.variable_scope(name):
# filter : [height, width, output_channels, in_channels]
w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]],
initializer=tf.random_normal_initializer(stddev=stddev))
try:
deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape,
strides=[1, d_h, d_w, 1])
# Support for verisons of TensorFlow before 0.7.0
except AttributeError:
deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape,
strides=[1, d_h, d_w, 1])
biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
if with_w:
return deconv, w, biases
else:
return deconv
def __init__(self,day,learning_rate = 1e-2):
self.graph = tf.Graph()
with self.graph.as_default():
self.x_predict = tf.placeholder("float", [None,_feature_length])
self.y_ = tf.placeholder("float", [None,1])
#layer fc 1
w_1 = tf.get_variable('all/w_1', [_feature_length,],
initializer=tf.random_normal_initializer())
#zoom layer
w_zoom = tf.get_variable('all/w_zoom', [1,],
initializer=tf.random_normal_initializer())
#0.8~1.2
self.zoom = tf.nn.sigmoid(w_zoom)*0.4+0.8
self.percent = tf.nn.softmax(w_1)*self.zoom
self.y_p = tf.reduce_sum(self.x_predict*self.percent,1)
self.y_p = tf.reshape(self.y_p,[-1,1])
self.error_rate = tf.reduce_mean(tf.abs(self.y_-self.y_p)/self.y_)
self.mse = tf.reduce_mean(tf.abs(self.y_-self.y_p))
#self.mse = self.error_rate
self.optimizer = tf.train.AdamOptimizer(learning_rate)
self.train_step = self.optimizer.minimize(self.mse)
self.sess = tf.Session(graph = self.graph)
self.sess.run(tf.global_variables_initializer())
def _network(self):
# with tf.variable_scope(scope):
w_init = tf.random_normal_initializer(0., .1)
# actor part
# return mu & sigma to determine action_norm_dist
scope_var = "actor"
mu, sigma = net_frame.mlp_frame([200] , self.state , self.action_dim , scope_var, \
activation_fn=tf.nn.relu6, w_init=w_init, activation_fn_v=tf.nn.tanh, \
activation_fn_a=tf.nn.softplus, continu=True)
# cirtic part
# return value of the state
scope_var = "critic"
v = net_frame.mlp_frame([100], self.state, 1, scope_var, activation_fn=tf.nn.relu6)
return mu, sigma, v
# ===============================================================
# DDPG Agent
# ===============================================================
def gated_resnet(x, a=None, h=None, nonlinearity=concat_elu, conv=conv2d, init=False, counters={}, ema=None, dropout_p=0., **kwargs):
xs = int_shape(x)
num_filters = xs[-1]
c1 = conv(nonlinearity(x), num_filters)
if a is not None: # add short-cut connection if auxiliary input 'a' is given
c1 += nin(nonlinearity(a), num_filters)
c1 = nonlinearity(c1)
if dropout_p > 0:
c1 = tf.nn.dropout(c1, keep_prob=1. - dropout_p)
c2 = conv(c1, num_filters * 2, init_scale=0.1)
# add projection of h vector if included: conditional generation
if h is not None:
with tf.variable_scope(get_name('conditional_weights', counters)):
hw = get_var_maybe_avg('hw', ema, shape=[int_shape(h)[-1], 2 * num_filters], dtype=tf.float32,
initializer=tf.random_normal_initializer(0, 0.05), trainable=True)
if init:
hw = hw.initialized_value()
c2 += tf.reshape(tf.matmul(h, hw), [xs[0], 1, 1, 2 * num_filters])
# Is this 3,2 or 2,3 ?
a, b = tf.split(c2, 2, 3)
c3 = a * tf.nn.sigmoid(b)
return x + c3
def prelu(self):
def _prelu(_x):
orig_shape = self.shape(_x)
_x = tf.reshape(_x, [orig_shape[0], -1])
with tf.variable_scope(self.generate_name(), reuse=self._reuse):
alphas = tf.get_variable('prelu',
_x.get_shape()[-1],
initializer=tf.random_normal_initializer(mean=0.0,stddev=0.01),
dtype=tf.float32)
pos = tf.nn.relu(_x)
neg = alphas * (_x - abs(_x)) * 0.5
self.add_weights(alphas)
return tf.reshape(pos + neg, orig_shape)
return _prelu
def __init__(self, isize, hsize, msize, asize, max_len, rnn_class, **kwargs):
super(Decoder, self).__init__()
self.name = kwargs.get('name', self.__class__.__name__)
self.scope = kwargs.get('scope', self.name)
self.epsilon = tf.Variable(kwargs.get('epsilon', 1.0), trainable=False)
self.isize = isize
self.hsize = hsize
self.msize = msize
self.asize = asize
self.max_len = max_len
self.num_layer = kwargs.get('num_layer', 1)
self.rnn_cell = tf.nn.rnn_cell.MultiRNNCell([rnn_class(num_units=self.hsize)] * self.num_layer)
self.weight_intializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
def __init__(self, vsize, esize, hsize, rnn_class, **kwargs):
super(Encoder, self).__init__()
self.name = kwargs.get('name', self.__class__.__name__)
self.scope = kwargs.get('scope', self.name)
self.vsize = vsize # vocabulary size
self.esize = esize # embedding size
self.hsize = hsize # hidden size
self.num_layer = kwargs.get('num_layer', 1)
self.rnn_cell_fw = [rnn_class(num_units=self.hsize)] * self.num_layer
self.rnn_cell_bw = [rnn_class(num_units=self.hsize)] * self.num_layer
self.embed_initializer = tf.random_normal_initializer(mean=0.0, stddev=1.)
def call(self, step_inputs, state, scope=None, initialization='gaussian'):
"""
Make one step of ISAN transition.
Args:
step_inputs: one-hot encoded inputs, shape bs x n
state: previous hidden state, shape bs x d
scope: current scope
initialization: how to initialize the transition matrices:
orthogonal: usually speeds up training, orthogonalize Gaussian matrices
gaussian: sample gaussian matrices with a sensible scale
"""
d = self._num_units
n = step_inputs.shape[1].value
if initialization == 'orthogonal':
wx_ndd_init = np.zeros((n, d * d), dtype=np.float32)
for i in range(n):
wx_ndd_init[i, :] = orth(np.random.randn(d, d)).astype(np.float32).ravel()
wx_ndd_initializer = tf.constant_initializer(wx_ndd_init)
elif initialization == 'gaussian':
wx_ndd_initializer = tf.random_normal_initializer(stddev=1.0 / np.sqrt(d))
else:
raise Exception('Unknown init type: %s' % initialization)
wx_ndd = tf.get_variable('Wx', shape=[n, d * d],
initializer=wx_ndd_initializer)
bx_nd = tf.get_variable('bx', shape=[n, d],
initializer=tf.zeros_initializer())
# Multiplication with a 1-hot is just row selection.
# As of Jan '17 this is faster than doing gather.
Wx_bdd = tf.reshape(tf.matmul(step_inputs, wx_ndd), [-1, d, d])
bx_bd = tf.reshape(tf.matmul(step_inputs, bx_nd), [-1, 1, d])
# Reshape the state so that matmul multiplies different matrices
# for each batch element.
single_state = tf.reshape(state, [-1, 1, d])
new_state = tf.reshape(tf.matmul(single_state, Wx_bdd) + bx_bd, [-1, d])
return new_state, new_state
def encode(self, sequence, sequence_length):
"""Encodes input sequences into a MultivariateNormalDiag distribution."""
hparams = self.hparams
z_size = hparams.z_size
sequence = tf.to_float(sequence)
encoder_output = self.encoder.encode(sequence, sequence_length)
mu = tf.layers.dense(
encoder_output,
z_size,
name='encoder/mu',
kernel_initializer=tf.random_normal_initializer(stddev=0.001))
sigma = tf.layers.dense(
encoder_output,
z_size,
activation=tf.nn.softplus,
name='encoder/sigma',
kernel_initializer=tf.random_normal_initializer(stddev=0.001))
return ds.MultivariateNormalDiag(loc=mu, scale_diag=sigma)
def __call__(self, x, train=True):
shape = x.get_shape().as_list()
if train:
with tf.variable_scope(self.name) as scope:
self.beta = tf.get_variable("beta", [shape[-1]],
initializer=tf.constant_initializer(0.))
self.gamma = tf.get_variable("gamma", [shape[-1]],
initializer=tf.random_normal_initializer(1., 0.02))
batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments')
ema_apply_op = self.ema.apply([batch_mean, batch_var])
self.ema_mean, self.ema_var = self.ema.average(batch_mean), self.ema.average(batch_var)
with tf.control_dependencies([ema_apply_op]):
mean, var = tf.identity(batch_mean), tf.identity(batch_var)
else:
mean, var = self.ema_mean, self.ema_var
normed = tf.nn.batch_norm_with_global_normalization(
x, mean, var, self.beta, self.gamma, self.epsilon, scale_after_normalization=True)
return normed