def predict(self, tree):
if tr.isleaf(tree):
# output = word vector
try:
tree.vector = self.L[:, self.word_map[tree[0]]]
except:
tree.vector = self.L[:, self.word_map[tr.UNK]]
else:
# calculate output of child nodes
self.predict(tree[0])
self.predict(tree[1])
# compute output
lr = np.hstack([tree[0].vector, tree[1].vector])
tree.vector = np.tanh(
np.tensordot(self.V, np.outer(lr, lr), axes=([1, 2], [0, 1])) +
np.dot(self.W, lr) + self.b)
# softmax
import util
tree.output = util.softmax(np.dot(self.Ws, tree.vector) + self.bs)
label = np.argmax(tree.output)
tree.set_label(str(label))
return tree
python类softmax()的实例源码
def sample_noun(self, vector):
"""Sample a noun at random.
The probability of word :math:`w` is
.. math::
\log(p(w))\propto w^Tv`
where :math:`p` is the poem vector and :math:`w` the word vector"""
p = util.softmax(self.noun_vectors.dot(vector) / self.tau)
return npr.choice(self.nouns, p=p)
def sample_adjective(self, vector):
"""Sample an adjective at random (same method as sample_noun)"""
p = util.softmax(self.adj_vectors.dot(vector) / self.tau)
return npr.choice(self.adjs, p=p)
generate_synthetic_data_alt.py 文件源码
项目:dict_based_learning
作者: tombosc
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def proba(self, features, params):
"""
return a categorical probability distribution over the vocabulary
"""
product = np.dot(features, params)
return softmax(product, self.T)
def forward_prop(self, tree):
cost = 0.0
result = np.zeros((5,5))
if tr.isleaf(tree):
# output = word vector
try:
tree.vector = self.L[:, self.word_map[tree[0]]]
except:
tree.vector = self.L[:, self.word_map[tr.UNK]]
tree.fprop = True
else:
# calculate output of child nodes
lcost, lresult = self.forward_prop(tree[0])
rcost, rresult = self.forward_prop(tree[1])
cost += lcost + rcost
result += lresult + rresult
# compute output
lr = np.hstack([tree[0].vector, tree[1].vector])
tree.vector = np.tanh(
np.tensordot(self.V, np.outer(lr, lr), axes=([1, 2], [0, 1])) +
np.dot(self.W, lr) + self.b)
# softmax
tree.output = np.dot(self.Ws, tree.vector) + self.bs
tree.output -= np.max(tree.output)
tree.output = np.exp(tree.output)
tree.output /= np.sum(tree.output)
tree.frop = True
# cost
cost -= np.log(tree.output[int(tree.label())])
true_label = int(tree.label())
predicted_label = np.argmax(tree.output)
result[true_label, predicted_label] += 1
return cost, result
def back_prop(self, tree, error=None):
# clear nodes
tree.frop = False
# softmax grad
deltas = tree.output
deltas[int(tree.label())] -= 1.0
self.dWs += np.outer(deltas, tree.vector)
self.dbs += deltas
deltas = np.dot(self.Ws.T, deltas)
if error is not None:
deltas += error
deltas *= (1 - tree.vector**2)
# leaf node => update word vectors
if tr.isleaf(tree):
try:
index = self.word_map[tree[0]]
except KeyError:
index = self.word_map[tr.UNK]
self.dL[index] += deltas
return
# Hidden gradients
else:
lr = np.hstack([tree[0].vector, tree[1].vector])
outer = np.outer(deltas, lr)
self.dV += (np.outer(lr, lr)[..., None] * deltas).T
self.dW += outer
self.db += deltas
# Compute error for children
deltas = np.dot(self.W.T, deltas)
deltas += np.tensordot(self.V.transpose((0,2,1)) + self.V, outer.T,
axes=([1,0], [0,1]))
self.back_prop(tree[0], deltas[:self.dim])
self.back_prop(tree[1], deltas[self.dim:])
generate_synthetic_data_alt.py 文件源码
项目:dict_based_learning
作者: tombosc
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def __init__(self, n_primes, n_composed, features_size, markov_order,
temperature=1.0, min_len_definitions=2, max_len_definitions=4):
"""
markov_order: integer at least 1 such that
p(x_t|x_t-1:x_1) = p(x_t|x_t-1:x_t-markov_order)
temperature: temperature for softmax
"""
self.mo = markov_order
self.np = n_primes
self.nc = n_composed
self.V = self.np + self.nc
self.T = temperature
self.min_len_def = min_len_definitions
self.max_len_def = max_len_definitions
self.features_size = features_size
# tokens are composed of a..z letters
alphabet = ''.join([chr(c) for c in range(97, 97+26)]) # str(a..z)
# tokens all have the same size tok_len
self.tok_len = int(np.log(self.V) / np.log(len(alphabet)) + 1)
# enumerate all the tokens
self.vocabulary = []
for i, tok in zip(range(self.V),
itertools.product(alphabet, repeat=self.tok_len)):
self.vocabulary.append(''.join(tok))
self.params = uniform(0,1,(self.mo * features_size, self.V))
self.features = uniform(0,1,(self.V,features_size))
self.dictionary = {}
for i in range(self.np, self.np+self.nc):
# sample len of def, sample def, store in dictionary
# then compute the features as a rescaled mean of the features
len_diff = self.max_len_def - self.min_len_def
len_def = np.random.choice(len_diff) + self.min_len_def
definition = np.random.choice(self.np, size=len_def, replace=False)
tok = self.vocabulary[i]
self.dictionary[tok] = [self.vocabulary[e] for e in definition]
#factor = np.random.beta(a=3, b=2.5) # closer to 1 than 0
#factor = np.random.beta(a=1, b=3) # closer to 0 than 1
factor = 1#1/(8*self.nc)
f = factor * np.mean([self.features[e] for e in definition], axis=0)
self.features[i] = f
self.initial_features = uniform(0,1,(self.mo, features_size))
def inference(documents, doc_mask, query, query_mask):
embedding = tf.get_variable('embedding',
[FLAGS.vocab_size, FLAGS.embedding_size],
initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05))
regularizer = tf.nn.l2_loss(embedding)
doc_emb = tf.nn.dropout(tf.nn.embedding_lookup(embedding, documents), FLAGS.dropout_keep_prob)
doc_emb.set_shape([None, None, FLAGS.embedding_size])
query_emb = tf.nn.dropout(tf.nn.embedding_lookup(embedding, query), FLAGS.dropout_keep_prob)
query_emb.set_shape([None, None, FLAGS.embedding_size])
with tf.variable_scope('document', initializer=orthogonal_initializer()):
fwd_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size)
back_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size)
doc_len = tf.reduce_sum(doc_mask, reduction_indices=1)
h, _ = tf.nn.bidirectional_dynamic_rnn(
fwd_cell, back_cell, doc_emb, sequence_length=tf.to_int64(doc_len), dtype=tf.float32)
#h_doc = tf.nn.dropout(tf.concat(2, h), FLAGS.dropout_keep_prob)
h_doc = tf.concat(h, 2)
with tf.variable_scope('query', initializer=orthogonal_initializer()):
fwd_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size)
back_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size)
query_len = tf.reduce_sum(query_mask, reduction_indices=1)
h, _ = tf.nn.bidirectional_dynamic_rnn(
fwd_cell, back_cell, query_emb, sequence_length=tf.to_int64(query_len), dtype=tf.float32)
#h_query = tf.nn.dropout(tf.concat(2, h), FLAGS.dropout_keep_prob)
h_query = tf.concat(h, 2)
M = tf.matmul(h_doc, h_query, adjoint_b=True)
M_mask = tf.to_float(tf.matmul(tf.expand_dims(doc_mask, -1), tf.expand_dims(query_mask, 1)))
alpha = softmax(M, 1, M_mask)
beta = softmax(M, 2, M_mask)
#query_importance = tf.expand_dims(tf.reduce_mean(beta, reduction_indices=1), -1)
query_importance = tf.expand_dims(tf.reduce_sum(beta, 1) / tf.to_float(tf.expand_dims(doc_len, -1)), -1)
s = tf.squeeze(tf.matmul(alpha, query_importance), [2])
unpacked_s = zip(tf.unstack(s, FLAGS.batch_size), tf.unstack(documents, FLAGS.batch_size))
y_hat = tf.stack([tf.unsorted_segment_sum(attentions, sentence_ids, FLAGS.vocab_size) for (attentions, sentence_ids) in unpacked_s])
return y_hat, regularizer