def inference(self):
"""main computation graph here: 1. embeddding layer, 2.Bi-LSTM layer, 3.concat, 4.FC layer 5.softmax """
#1.get emebedding of words in the sentence
self.embedded_words = tf.nn.embedding_lookup(self.Embedding,self.input_x) #shape:[None,sentence_length,embed_size]
#2. Bi-lstm layer
# define lstm cess:get lstm cell output
lstm_fw_cell=rnn.BasicLSTMCell(self.hidden_size) #forward direction cell
lstm_bw_cell=rnn.BasicLSTMCell(self.hidden_size) #backward direction cell
if self.dropout_keep_prob is not None:
lstm_fw_cell=rnn.DropoutWrapper(lstm_fw_cell,output_keep_prob=self.dropout_keep_prob)
lstm_bw_cell=rnn.DropoutWrapper(lstm_bw_cell,output_keep_prob=self.dropout_keep_prob)
# bidirectional_dynamic_rnn: input: [batch_size, max_time, input_size]
# output: A tuple (outputs, output_states)
# where:outputs: A tuple (output_fw, output_bw) containing the forward and the backward rnn output `Tensor`.
outputs,_=tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,lstm_bw_cell,self.embedded_words,dtype=tf.float32) #[batch_size,sequence_length,hidden_size] #creates a dynamic bidirectional recurrent neural network
print("outputs:===>",outputs) #outputs:(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose:0' shape=(?, 5, 100) dtype=float32>, <tf.Tensor 'ReverseV2:0' shape=(?, 5, 100) dtype=float32>))
#3. concat output
output_rnn=tf.concat(outputs,axis=2) #[batch_size,sequence_length,hidden_size*2]
self.output_rnn_last=tf.reduce_mean(output_rnn,axis=1) #[batch_size,hidden_size*2] #output_rnn_last=output_rnn[:,-1,:] ##[batch_size,hidden_size*2] #TODO
print("output_rnn_last:", self.output_rnn_last) # <tf.Tensor 'strided_slice:0' shape=(?, 200) dtype=float32>
#4. logits(use linear layer)
with tf.name_scope("output"): #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network.
logits = tf.matmul(self.output_rnn_last, self.W_projection) + self.b_projection # [batch_size,num_classes]
return logits
python类matmul()的实例源码
def dense(inputs, units, bias_shape, w_i, b_i=None, activation=tf.nn.relu):
# ??tf.layers?????flatten
# dense1 = tf.layers.dense(tf.contrib.layers.flatten(relu5), activation=tf.nn.relu, units=50)
if not isinstance(inputs, ops.Tensor):
inputs = ops.convert_to_tensor(inputs, dtype='float')
# dim_list = inputs.get_shape().as_list()
# flatten_shape = dim_list[1] if len(dim_list) <= 2 else reduce(lambda x, y: x * y, dim_list[1:])
# reshaped = tf.reshape(inputs, [dim_list[0], flatten_shape])
if len(inputs.shape) > 2:
inputs = tf.contrib.layers.flatten(inputs)
flatten_shape = inputs.shape[1]
weights = tf.get_variable('weights', shape=[flatten_shape, units], initializer=w_i)
dense = tf.matmul(inputs, weights)
if bias_shape is not None:
assert bias_shape[0] == units
biases = tf.get_variable('biases', shape=bias_shape, initializer=b_i)
return activation(dense + biases) if activation is not None else dense + biases
return activation(dense) if activation is not None else dense
def ae(x):
if nonlinearity_name == 'relu':
f = tf.nn.relu
elif nonlinearity_name == 'elu':
f = tf.nn.elu
elif nonlinearity_name == 'gelu':
# def gelu(x):
# return tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.)
# f = gelu
def gelu_fast(_x):
return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
f = gelu_fast
elif nonlinearity_name == 'silu':
def silu(_x):
return _x * tf.sigmoid(_x)
f = silu
# elif nonlinearity_name == 'soi':
# def soi_map(x):
# u = tf.random_uniform(tf.shape(x))
# mask = tf.to_float(tf.less(u, (1 + tf.erf(x / tf.sqrt(2.))) / 2.))
# return tf.cond(is_training, lambda: tf.mul(mask, x),
# lambda: tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.))
# f = soi_map
else:
raise NameError("Need 'relu', 'elu', 'gelu', or 'silu' for nonlinearity_name")
h1 = f(tf.matmul(x, W['1']) + b['1'])
h2 = f(tf.matmul(h1, W['2']) + b['2'])
h3 = f(tf.matmul(h2, W['3']) + b['3'])
h4 = f(tf.matmul(h3, W['4']) + b['4'])
h5 = f(tf.matmul(h4, W['5']) + b['5'])
h6 = f(tf.matmul(h5, W['6']) + b['6'])
h7 = f(tf.matmul(h6, W['7']) + b['7'])
return tf.matmul(h7, W['8']) + b['8']
def baseline_forward(self, X, size, n_class):
shape = X.get_shape()
_X = tf.transpose(X, [1, 0, 2]) # batch_size x sentence_length x word_length -> batch_size x sentence_length x word_length
_X = tf.reshape(_X, [-1, int(shape[2])]) # (batch_size x sentence_length) x word_length
seq = tf.split(0, int(shape[1]), _X) # sentence_length x (batch_size x word_length)
with tf.name_scope("LSTM"):
lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=1.0)
outputs, states = rnn.rnn(lstm_cell, seq, dtype=tf.float32)
with tf.name_scope("LSTM-Classifier"):
W = tf.Variable(tf.random_normal([size, n_class]), name="W")
b = tf.Variable(tf.random_normal([n_class]), name="b")
output = tf.matmul(outputs[-1], W) + b
return output
def makeDNN(hidden_layer):
# input from X
prevLayer = X
# make layers
for i in range(hidden_layer):
if i==0:
newWeight = tf.get_variable("W0%d" % i, shape=[features, wide], initializer=tf.contrib.layers.xavier_initializer())
else:
newWeight = tf.get_variable("W0%d" % i, shape=[wide, wide], initializer=tf.contrib.layers.xavier_initializer())
newBias = tf.Variable(tf.random_normal([wide]))
newLayer = tf.nn.relu(tf.matmul(prevLayer, newWeight) + newBias)
newDropLayer = tf.nn.dropout(newLayer, dropout_rate)
prevLayer = newDropLayer
# make output layers
Wo = tf.get_variable("Wo", shape=[wide, labels], initializer=tf.contrib.layers.xavier_initializer())
bo = tf.Variable(tf.random_normal([labels]))
return tf.matmul(prevLayer, Wo) + bo
# tf Graph Input
def get_loss(pred, label, end_points, reg_weight=0.001):
""" pred: B*NUM_CLASSES,
label: B, """
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
classify_loss = tf.reduce_mean(loss)
tf.summary.scalar('classify loss', classify_loss)
# Enforce the transformation as orthogonal matrix
transform = end_points['transform'] # BxKxK
K = transform.get_shape()[1].value
mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))
mat_diff -= tf.constant(np.eye(K), dtype=tf.float32)
mat_diff_loss = tf.nn.l2_loss(mat_diff)
tf.summary.scalar('mat loss', mat_diff_loss)
return classify_loss + mat_diff_loss * reg_weight
def get_loss(pred, label, end_points, reg_weight=0.001):
""" pred: BxNxC,
label: BxN, """
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
classify_loss = tf.reduce_mean(loss)
tf.scalar_summary('classify loss', classify_loss)
# Enforce the transformation as orthogonal matrix
transform = end_points['transform'] # BxKxK
K = transform.get_shape()[1].value
mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))
mat_diff -= tf.constant(np.eye(K), dtype=tf.float32)
mat_diff_loss = tf.nn.l2_loss(mat_diff)
tf.scalar_summary('mat_loss', mat_diff_loss)
return classify_loss + mat_diff_loss * reg_weight
def get_loss(l_pred, seg_pred, label, seg, weight, end_points):
per_instance_label_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=l_pred, labels=label)
label_loss = tf.reduce_mean(per_instance_label_loss)
# size of seg_pred is batch_size x point_num x part_cat_num
# size of seg is batch_size x point_num
per_instance_seg_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=seg_pred, labels=seg), axis=1)
seg_loss = tf.reduce_mean(per_instance_seg_loss)
per_instance_seg_pred_res = tf.argmax(seg_pred, 2)
# Enforce the transformation as orthogonal matrix
transform = end_points['transform'] # BxKxK
K = transform.get_shape()[1].value
mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1])) - tf.constant(np.eye(K), dtype=tf.float32)
mat_diff_loss = tf.nn.l2_loss(mat_diff)
total_loss = weight * seg_loss + (1 - weight) * label_loss + mat_diff_loss * 1e-3
return total_loss, label_loss, per_instance_label_loss, seg_loss, per_instance_seg_loss, per_instance_seg_pred_res
def __init__(self, embedding):
self.sess = tf.Session()
self.inputs = tf.placeholder(tf.float32,
[None, embedding.shape[1]],
name='inputs')
self.test_vec = tf.placeholder(tf.float32, [1, embedding.shape[1]],
name='test_vec')
self.cos_distance = tf.matmul(self.inputs, tf.transpose(self.test_vec))
#-----------------------------------------------------------------------
# Compute normalized embedding matrix
#-----------------------------------------------------------------------
row_sum = tf.reduce_sum(tf.square(self.inputs), axis=1,
keep_dims=True)
norm = tf.sqrt(row_sum)
self.normalized = self.inputs / norm
self.embedding = self.sess.run(self.normalized,
feed_dict={self.inputs: embedding})
#---------------------------------------------------------------------------
def discriminate(self, image, Y):
print("Initializing the discriminator")
print("Y shape", Y.get_shape())
yb = tf.reshape(Y, tf.stack([self.batch_size, 1, 1, self.dim_y]))
print("image shape", image.get_shape())
print("yb shape", yb.get_shape())
X = tf.concat([image, yb * tf.ones([self.batch_size, 24, 24, self.dim_y])],3)
print("X shape", X.get_shape())
h1 = lrelu( tf.nn.conv2d( X, self.discrim_W1, strides=[1,2,2,1], padding='SAME' ))
print("h1 shape", h1.get_shape())
h1 = tf.concat([h1, yb * tf.ones([self.batch_size, 12, 12, self.dim_y])],3)
print("h1 shape", h1.get_shape())
h2 = lrelu(batchnormalize( tf.nn.conv2d( h1, self.discrim_W2, strides=[1,2,2,1], padding='SAME')) )
print("h2 shape", h2.get_shape())
h2 = tf.reshape(h2, [self.batch_size, -1])
h2 = tf.concat([h2, Y], 1)
discri=tf.matmul(h2, self.discrim_W3 )
print("discri shape", discri.get_shape())
h3 = lrelu(batchnormalize(discri))
return h3
def samples_generator(self, batch_size):
Z = tf.placeholder(tf.float32, [batch_size, self.dim_z])
Y = tf.placeholder(tf.float32, [batch_size, self.dim_y])
yb = tf.reshape(Y, [batch_size, 1, 1, self.dim_y])
Z_ = tf.concat([Z,Y], 1)
h1 = tf.nn.relu(batchnormalize(tf.matmul(Z_, self.gen_W1)))
h1 = tf.concat([h1, Y], 1)
h2 = tf.nn.relu(batchnormalize(tf.matmul(h1, self.gen_W2)))
h2 = tf.reshape(h2, [batch_size,6,6,self.dim_W2])
h2 = tf.concat([h2, yb*tf.ones([batch_size, 6,6, self.dim_y])], 3)
output_shape_l3 = [batch_size,12,12,self.dim_W3]
h3 = tf.nn.conv2d_transpose(h2, self.gen_W3, output_shape=output_shape_l3, strides=[1,2,2,1])
h3 = tf.nn.relu( batchnormalize(h3) )
h3 = tf.concat([h3, yb*tf.ones([batch_size, 12,12,self.dim_y])], 3)
output_shape_l4 = [batch_size,24,24,self.dim_channel]
h4 = tf.nn.conv2d_transpose(h3, self.gen_W4, output_shape=output_shape_l4, strides=[1,2,2,1])
x = tf.nn.sigmoid(h4)
return Z, Y, x
def transfer_color(content, style):
import scipy.linalg as sl
# Mean and covariance of content
content_mean = np.mean(content, axis = (0, 1))
content_diff = content - content_mean
content_diff = np.reshape(content_diff, (-1, content_diff.shape[2]))
content_covariance = np.matmul(content_diff.T, content_diff) / (content_diff.shape[0])
# Mean and covariance of style
style_mean = np.mean(style, axis = (0, 1))
style_diff = style - style_mean
style_diff = np.reshape(style_diff, (-1, style_diff.shape[2]))
style_covariance = np.matmul(style_diff.T, style_diff) / (style_diff.shape[0])
# Calculate A and b
A = np.matmul(sl.sqrtm(content_covariance), sl.inv(sl.sqrtm(style_covariance)))
b = content_mean - np.matmul(A, style_mean)
# Construct new style
new_style = np.reshape(style, (-1, style.shape[2])).T
new_style = np.matmul(A, new_style).T
new_style = np.reshape(new_style, style.shape)
new_style = new_style + b
return new_style
tf_utils.py 文件源码
项目:convolutional-pose-machines-tensorflow
作者: timctho
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def rotate_points(orig_points, angle, w, h):
"""Return rotated points
Args:
orig_points: 'Tensor' with shape [N,2], each entry is point (x,y)
angle: rotate radians
Returns:
'Tensor' with shape [N,2], with rotated points
"""
# rotation
rotate_mat = tf.stack([[tf.cos(angle) / w, tf.sin(angle) / h],
[-tf.sin(angle) / w, tf.cos(angle) / h]])
# shift coord
orig_points = tf.subtract(orig_points, 0.5)
orig_points = tf.stack([orig_points[:, 0] * w,
orig_points[:, 1] * h], axis=1)
print(orig_points)
rotated_points = tf.matmul(orig_points, rotate_mat) + 0.5
return rotated_points
def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
shape = input_.get_shape().as_list()
#mat_shape=tf.stack([tf.shape(input_)[1],output_size])
mat_shape=[shape[1],output_size]
with tf.variable_scope(scope or "Linear"):
#matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
matrix = tf.get_variable("Matrix", mat_shape, tf.float32,
tf.random_normal_initializer(stddev=stddev))
bias = tf.get_variable("bias", [output_size],
initializer=tf.constant_initializer(bias_start))
if with_w:
return tf.matmul(input_, matrix) + bias, matrix, bias
else:
return tf.matmul(input_, matrix) + bias
#minibatch method that improves on openai
#because it doesn't fix batchsize:
#TODO: recheck when not sleepy
def _fc(self, x, fan_in, fan_out, layer_name, activation=None, L2=1, use_bias=True,
wmin=None,wmax=None,analysis=False):
show_weight = self.flags.visualize and 'weight' in self.flags.visualize
if wmin is not None or wmax is not None:
use_bias = False
assert wmin is not None and wmax is not None
with tf.variable_scope(layer_name.split('/')[-1]):
w,b = self._get_fc_weights(fan_in, fan_out, layer_name)
if wmin is not None:
wr = wmax-wmin
w = self._activate(w,'sigmoid')*wr+wmin
#w = tf.clip_by_value(w,wmin,wmax)
net = tf.matmul(x,w)
if use_bias:
net = tf.nn.bias_add(net, b)
net = self._activate(net, activation)
if show_weight:
tf.summary.histogram(name='W', values=w, collections=[tf.GraphKeys.WEIGHTS])
if use_bias:
tf.summary.histogram(name='bias', values=b, collections=[tf.GraphKeys.WEIGHTS])
if analysis:
net1 = tf.expand_dims(x,2)*tf.expand_dims(w,0)
#net1 = tf.reshape(net1,[tf.shape(x)[0],fan_in*fan_out])
return net,net1
return net
def __init__(self, sigma=0.1, beta_sampling=True, **kwargs):
"""
sigma:
Standard deviation of input data, for use in sampling.
beta_sampling:
Use beta distribution for sampling, instead of Gaussian.
"""
RBM.__init__(self, **kwargs)
if not kwargs.get('fromfile'):
self.sigma = sigma
self.beta_sampling = beta_sampling
if self.sigma is None: raise AssertionError('Need to supply sigma param.')
self.hidden = tf.placeholder(self.dtype, name='hidden',
shape=[None, self.n_hidden])
self.mean_v = tf.sigmoid(tf.matmul(self.hidden, self.params['W'],
transpose_b=True) +
self.params['bvis'])
def recode_cost(self, inputs, variation, eps=1e-5, **kwargs):
"""
Cost for given input batch of samples, under current params.
"""
h = self.get_h_inputs(inputs)
z_mu = tf.matmul(h, self.params['Mhz']) + self.params['bMhz']
z_sig = tf.matmul(h, self.params['Shz']) + self.params['bShz']
# KL divergence between latent space induced by encoder and ...
lat_loss = -tf.reduce_sum(1 + z_sig - z_mu**2 - tf.exp(z_sig), 1)
z = z_mu + tf.sqrt(tf.exp(z_sig)) * variation
h = self.get_h_latents(z)
x_mu = self.decoding(tf.matmul(h, self.params['Mhx']) + self.params['bMhx'])
x_sig = self.decoding(tf.matmul(h, self.params['Shx']) + self.params['bShx'])
# x_sig = tf.clip_by_value(x_mu * (1 - x_mu), .05, 1)
# decoding likelihood term
like_loss = tf.reduce_sum(tf.log(x_sig + eps) +
(inputs - x_mu)**2 / x_sig, 1)
# # Mean cross entropy between input and encode-decoded input.
# like_loss = 2 * tf.reduce_sum(functions.cross_entropy(inputs, x_mu), 1)
return .5 * tf.reduce_mean(like_loss + lat_loss)
def output_module(self):
"""
1.use attention mechanism between query and hidden states, to get weighted sum of hidden state. 2.non-linearity of query and hidden state to get label.
input: query_embedding:[batch_size,embed_size], hidden state:[batch_size,block_size,hidden_size] of memory
:return:y: predicted label.[]
"""
# 1.use attention mechanism between query and hidden states, to get weighted sum of hidden state.
# 1.1 get possibility distribution (of similiarity)
p=tf.nn.softmax(tf.multiply(tf.expand_dims(self.query_embedding,axis=1),self.hidden_state)) #shape:[batch_size,block_size,hidden_size]<---query_embedding_expand:[batch_size,1,hidden_size]; hidden_state:[batch_size,block_size,hidden_size]
# 1.2 get weighted sum of hidden state
u=tf.reduce_sum(tf.multiply(p,self.hidden_state),axis=1) #shape:[batch_size,hidden_size]<----------([batch_size,block_size,hidden_size],[batch_size,block_size,hidden_size])
# 2.non-linearity of query and hidden state to get label
H_u_matmul=tf.matmul(u,self.H)+self.h_u_bias #shape:[batch_size,hidden_size]<----([batch_size,hidden_size],[hidden_size,hidden_size])
activation=self.activation(self.query_embedding + H_u_matmul,scope="query_add_hidden") #shape:[batch_size,hidden_size]
activation = tf.nn.dropout(activation,keep_prob=self.dropout_keep_prob) #shape:[batch_size,hidden_size]
y=tf.matmul(activation,self.R)+self.y_bias #shape:[batch_size,vocab_size]<-----([batch_size,hidden_size],[hidden_size,vocab_size])
return y #shape:[batch_size,vocab_size]
a2_transformer_classification.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def inference(self):
""" building blocks:
encoder:6 layers.each layers has two sub-layers. the first is multi-head self-attention mechanism; the second is position-wise fully connected feed-forward network.
for each sublayer. use LayerNorm(x+Sublayer(x)). all dimension=512.
decoder:6 layers.each layers has three sub-layers. the second layer is performs multi-head attention over the ouput of the encoder stack.
for each sublayer. use LayerNorm(x+Sublayer(x)).
"""
# 1.embedding for encoder input & decoder input
# 1.1 position embedding for encoder input
input_x_embeded = tf.nn.embedding_lookup(self.Embedding,self.input_x) #[None,sequence_length, embed_size]
input_x_embeded=tf.multiply(input_x_embeded,tf.sqrt(tf.cast(self.d_model,dtype=tf.float32)))
input_mask=tf.get_variable("input_mask",[self.sequence_length,1],initializer=self.initializer)
input_x_embeded=tf.add(input_x_embeded,input_mask) #[None,sequence_length,embed_size].position embedding.
# 2. encoder
encoder_class=Encoder(self.d_model,self.d_k,self.d_v,self.sequence_length,self.h,self.batch_size,self.num_layer,input_x_embeded,input_x_embeded,dropout_keep_prob=self.dropout_keep_prob,use_residual_conn=self.use_residual_conn)
Q_encoded,K_encoded = encoder_class.encoder_fn() #K_v_encoder
Q_encoded=tf.reshape(Q_encoded,shape=(self.batch_size,-1)) #[batch_size,sequence_length*d_model]
with tf.variable_scope("output"):
logits = tf.matmul(Q_encoded, self.W_projection) + self.b_projection #logits shape:[batch_size*decoder_sent_length,self.num_classes]
print("logits:",logits)
return logits
def inference(self):
"""main computation graph here: 1. embeddding layer, 2.Bi-LSTM layer, 3.max pooling, 4.FC layer 5.softmax """
#1.get emebedding of words in the sentence
self.embedded_words = tf.nn.embedding_lookup(self.Embedding,self.input_x) #shape:[None,sentence_length,embed_size]
#2. Bi-lstm layer
output_conv=self.conv_layer_with_recurrent_structure() #shape:[None,sentence_length,embed_size*3]
#2.1 apply nolinearity
#b = tf.get_variable("b", [self.embed_size*3])
#h = tf.nn.relu(tf.nn.bias_add(output_conv, b), "relu")
#3. max pooling
output_pooling=tf.reduce_max(output_conv,axis=1) #shape:[None,embed_size*3]
#4. logits(use linear layer)
with tf.name_scope("dropout"):
h_drop=tf.nn.dropout(output_pooling,keep_prob=self.dropout_keep_prob) #[None,embed_size*3]
with tf.name_scope("output"): #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network.
logits = tf.matmul(h_drop, self.W_projection) + self.b_projection #shape:[batch_size,num_classes]<-----h_drop:[None,embed_size*3];b_projection:[hidden_size*3, self.num_classes]
return logits
def extract_argmax_and_embed(embedding, output_projection=None):
"""
Get a loop_function that extracts the previous symbol and embeds it. Used by decoder.
:param embedding: embedding tensor for symbol
:param output_projection: None or a pair (W, B). If provided, each fed previous output will
first be multiplied by W and added B.
:return: A loop function
"""
def loop_function(prev, _):
if output_projection is not None:
prev = tf.matmul(prev, output_projection[0]) + output_projection[1]
prev_symbol = tf.argmax(prev, 1) #?????INDEX
emb_prev = tf.gather(embedding, prev_symbol) #????INDEX???embedding
return emb_prev
return loop_function
# RNN??????
# ???????????????????test,?t???????t+1???s??
a1_seq2seq_attention_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 36
收藏 0
点赞 0
评论 0
def gru_cell_decoder(self, Xt, h_t_minus_1,context_vector):
"""
single step of gru for word level
:param Xt: Xt:[batch_size,embed_size]
:param h_t_minus_1:[batch_size,embed_size]
:param context_vector. [batch_size,embed_size].this represent the result from attention( weighted sum of input during current decoding step)
:return:
"""
# 1.update gate: decides how much past information is kept and how much new information is added.
z_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_z_decoder) + tf.matmul(h_t_minus_1,self.U_z_decoder) +tf.matmul(context_vector,self.C_z_decoder)+self.b_z_decoder) # z_t:[batch_size,self.hidden_size]
# 2.reset gate: controls how much the past state contributes to the candidate state.
r_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_r_decoder) + tf.matmul(h_t_minus_1,self.U_r_decoder) +tf.matmul(context_vector,self.C_r_decoder)+self.b_r_decoder) # r_t:[batch_size,self.hidden_size]
# candiate state h_t~
h_t_candiate = tf.nn.tanh(tf.matmul(Xt, self.W_h_decoder) +r_t * (tf.matmul(h_t_minus_1, self.U_h_decoder)) +tf.matmul(context_vector, self.C_h_decoder)+ self.b_h_decoder) # h_t_candiate:[batch_size,self.hidden_size]
# new state: a linear combine of pervious hidden state and the current new state h_t~
h_t = (1 - z_t) * h_t_minus_1 + z_t * h_t_candiate # h_t:[batch_size*num_sentences,hidden_size]
return h_t,h_t
# forward gru for first level: word levels
p72_TextCNN_with_RCNN_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def inference2(self):
"""main computation graph here: 1. embeddding layer, 2.Bi-LSTM layer, 3.max pooling, 4.FC layer 5.softmax """
#1.get emebedding of words in the sentence
self.embedded_words = tf.nn.embedding_lookup(self.Embedding,self.input_x) #shape:[None,sentence_length,embed_size]
#2. Bi-lstm layer
output_conv=self.conv_layer_with_recurrent_structure() #shape:[None,sentence_length,embed_size*3]
#3. max pooling
#print("output_conv:",output_conv) #(3, 5, 8, 100)
output_pooling=tf.reduce_max(output_conv,axis=1) #shape:[None,embed_size*3]
#print("output_pooling:",output_pooling) #(3, 8, 100)
#4. logits(use linear layer)
with tf.name_scope("dropout_rcnn"):
h_drop=tf.nn.dropout(output_pooling,keep_prob=self.dropout_keep_prob) #[None,embed_size*3]
#with tf.name_scope("output"): #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network.
logits = tf.matmul(h_drop, self.W_projection_rcnn) + self.b_projection_rcnn # [batch_size,num_classes]
return logits
p1_HierarchicalAttention_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def gru_single_step_word_level(self, Xt, h_t_minus_1):
"""
single step of gru for word level
:param Xt: Xt:[batch_size*num_sentences,embed_size]
:param h_t_minus_1:[batch_size*num_sentences,embed_size]
:return:
"""
# update gate: decides how much past information is kept and how much new information is added.
z_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_z) + tf.matmul(h_t_minus_1,
self.U_z) + self.b_z) # z_t:[batch_size*num_sentences,self.hidden_size]
# reset gate: controls how much the past state contributes to the candidate state.
r_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_r) + tf.matmul(h_t_minus_1,
self.U_r) + self.b_r) # r_t:[batch_size*num_sentences,self.hidden_size]
# candiate state h_t~
h_t_candiate = tf.nn.tanh(tf.matmul(Xt, self.W_h) +r_t * (tf.matmul(h_t_minus_1, self.U_h)) + self.b_h) # h_t_candiate:[batch_size*num_sentences,self.hidden_size]
# new state: a linear combine of pervious hidden state and the current new state h_t~
h_t = (1 - z_t) * h_t_minus_1 + z_t * h_t_candiate # h_t:[batch_size*num_sentences,hidden_size]
return h_t
p1_HierarchicalAttention_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 37
收藏 0
点赞 0
评论 0
def gru_single_step_sentence_level(self, Xt,
h_t_minus_1): # Xt:[batch_size, hidden_size*2]; h_t:[batch_size, hidden_size*2]
"""
single step of gru for sentence level
:param Xt:[batch_size, hidden_size*2]
:param h_t_minus_1:[batch_size, hidden_size*2]
:return:h_t:[batch_size,hidden_size]
"""
# update gate: decides how much past information is kept and how much new information is added.
z_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_z_sentence) + tf.matmul(h_t_minus_1,
self.U_z_sentence) + self.b_z_sentence) # z_t:[batch_size,self.hidden_size]
# reset gate: controls how much the past state contributes to the candidate state.
r_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_r_sentence) + tf.matmul(h_t_minus_1,
self.U_r_sentence) + self.b_r_sentence) # r_t:[batch_size,self.hidden_size]
# candiate state h_t~
h_t_candiate = tf.nn.tanh(tf.matmul(Xt, self.W_h_sentence) + r_t * (
tf.matmul(h_t_minus_1, self.U_h_sentence)) + self.b_h_sentence) # h_t_candiate:[batch_size,self.hidden_size]
# new state: a linear combine of pervious hidden state and the current new state h_t~
h_t = (1 - z_t) * h_t_minus_1 + z_t * h_t_candiate
return h_t
# forward gru for first level: word levels
def extract_argmax_and_embed(embedding, output_projection=None):
"""
Get a loop_function that extracts the previous symbol and embeds it. Used by decoder.
:param embedding: embedding tensor for symbol
:param output_projection: None or a pair (W, B). If provided, each fed previous output will
first be multiplied by W and added B.
:return: A loop function
"""
def loop_function(prev, _):
if output_projection is not None:
prev = tf.matmul(prev, output_projection[0]) + output_projection[1]
prev_symbol = tf.argmax(prev, 1) #?????INDEX
emb_prev = tf.gather(embedding, prev_symbol) #????INDEX???embedding
return emb_prev
return loop_function
# RNN??????
# ???????????????????test,?t???????t+1???s??
p1_HierarchicalAttention_model_transformer.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 55
收藏 0
点赞 0
评论 0
def gru_single_step_word_level(self, Xt, h_t_minus_1):
"""
single step of gru for word level
:param Xt: Xt:[batch_size*num_sentences,embed_size]
:param h_t_minus_1:[batch_size*num_sentences,embed_size]
:return:
"""
# update gate: decides how much past information is kept and how much new information is added.
z_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_z) + tf.matmul(h_t_minus_1,
self.U_z) + self.b_z) # z_t:[batch_size*num_sentences,self.hidden_size]
# reset gate: controls how much the past state contributes to the candidate state.
r_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_r) + tf.matmul(h_t_minus_1,
self.U_r) + self.b_r) # r_t:[batch_size*num_sentences,self.hidden_size]
# candiate state h_t~
h_t_candiate = tf.nn.tanh(tf.matmul(Xt, self.W_h) +r_t * (tf.matmul(h_t_minus_1, self.U_h)) + self.b_h) # h_t_candiate:[batch_size*num_sentences,self.hidden_size]
# new state: a linear combine of pervious hidden state and the current new state h_t~
h_t = (1 - z_t) * h_t_minus_1 + z_t * h_t_candiate # h_t:[batch_size*num_sentences,hidden_size]
return h_t
p1_HierarchicalAttention_model_transformer.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 38
收藏 0
点赞 0
评论 0
def gru_single_step_sentence_level(self, Xt,
h_t_minus_1): # Xt:[batch_size, hidden_size*2]; h_t:[batch_size, hidden_size*2]
"""
single step of gru for sentence level
:param Xt:[batch_size, hidden_size*2]
:param h_t_minus_1:[batch_size, hidden_size*2]
:return:h_t:[batch_size,hidden_size]
"""
# update gate: decides how much past information is kept and how much new information is added.
z_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_z_sentence) + tf.matmul(h_t_minus_1,
self.U_z_sentence) + self.b_z_sentence) # z_t:[batch_size,self.hidden_size]
# reset gate: controls how much the past state contributes to the candidate state.
r_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_r_sentence) + tf.matmul(h_t_minus_1,
self.U_r_sentence) + self.b_r_sentence) # r_t:[batch_size,self.hidden_size]
# candiate state h_t~
h_t_candiate = tf.nn.tanh(tf.matmul(Xt, self.W_h_sentence) + r_t * (
tf.matmul(h_t_minus_1, self.U_h_sentence)) + self.b_h_sentence) # h_t_candiate:[batch_size,self.hidden_size]
# new state: a linear combine of pervious hidden state and the current new state h_t~
h_t = (1 - z_t) * h_t_minus_1 + z_t * h_t_candiate
return h_t
# forward gru for first level: word levels
p9_twoCNNTextRelation_model.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def inference(self):
"""main computation graph here: 1. embeddding layers, 2.convolutional layer, 3.max-pooling, 4.softmax layer."""
# 1.=====>get emebedding of words in the sentence
self.embedded_words1 = tf.nn.embedding_lookup(self.Embedding,self.input_x)#[None,sentence_length,embed_size]
self.sentence_embeddings_expanded1=tf.expand_dims(self.embedded_words1,-1) #[None,sentence_length,embed_size,1). expand dimension so meet input requirement of 2d-conv
self.embedded_words2 = tf.nn.embedding_lookup(self.Embedding,self.input_x2)#[None,sentence_length,embed_size]
self.sentence_embeddings_expanded2=tf.expand_dims(self.embedded_words2,-1) #[None,sentence_length,embed_size,1). expand dimension so meet input requirement of 2d-conv
#2.1 get features of sentence1
h1=self.conv_relu_pool_dropout(self.sentence_embeddings_expanded1,name_scope_prefix="s1") #[None,num_filters_total]
#2.2 get features of sentence2
h2 =self.conv_relu_pool_dropout(self.sentence_embeddings_expanded2,name_scope_prefix="s2") # [None,num_filters_total]
#3. concat features
h=tf.concat([h1,h2],axis=1) #[None,num_filters_total*2]
#4. logits(use linear layer)and predictions(argmax)
with tf.name_scope("output"):
logits = tf.matmul(h,self.W_projection) + self.b_projection #shape:[None, self.num_classes]==tf.matmul([None,self.num_filters_total*2],[self.num_filters_total*2,self.num_classes])
return logits
def create_critic_net(self, num_states=4, num_actions=1):
N_HIDDEN_1 = 400
N_HIDDEN_2 = 300
critic_state_in = tf.placeholder("float",[None,num_states])
critic_action_in = tf.placeholder("float",[None,num_actions])
W1_c = tf.Variable(tf.random_uniform([num_states,N_HIDDEN_1],-1/math.sqrt(num_states),1/math.sqrt(num_states)))
B1_c = tf.Variable(tf.random_uniform([N_HIDDEN_1],-1/math.sqrt(num_states),1/math.sqrt(num_states)))
W2_c = tf.Variable(tf.random_uniform([N_HIDDEN_1,N_HIDDEN_2],-1/math.sqrt(N_HIDDEN_1+num_actions),1/math.sqrt(N_HIDDEN_1+num_actions)))
W2_action_c = tf.Variable(tf.random_uniform([num_actions,N_HIDDEN_2],-1/math.sqrt(N_HIDDEN_1+num_actions),1/math.sqrt(N_HIDDEN_1+num_actions)))
B2_c= tf.Variable(tf.random_uniform([N_HIDDEN_2],-1/math.sqrt(N_HIDDEN_1+num_actions),1/math.sqrt(N_HIDDEN_1+num_actions)))
W3_c= tf.Variable(tf.random_uniform([N_HIDDEN_2,1],-0.003,0.003))
B3_c= tf.Variable(tf.random_uniform([1],-0.003,0.003))
H1_c=tf.nn.softplus(tf.matmul(critic_state_in,W1_c)+B1_c)
H2_c=tf.nn.tanh(tf.matmul(H1_c,W2_c)+tf.matmul(critic_action_in,W2_action_c)+B2_c)
critic_q_model=tf.matmul(H2_c,W3_c)+B3_c
return W1_c, B1_c, W2_c, W2_action_c, B2_c, W3_c, B3_c, critic_q_model, critic_state_in, critic_action_in