def __unpool(self, updates, mask, ksize=[1, 2, 2, 1], output_shape=None, feature_count=None, name=''):
with tf.variable_scope(name):
mask = tf.cast(mask, tf.int32)
input_shape = tf.shape(updates, out_type=tf.int32)
# calculation new shape
if feature_count is None:
feature_count = input_shape[3]
if output_shape is None:
output_shape = (1, input_shape[1] * ksize[1], input_shape[2] * ksize[2], feature_count)
output_shape = tf.cast(output_shape, tf.int32)
# calculation indices for batch, height, width and feature maps
one_like_mask = tf.cast(tf.ones_like(mask, dtype=tf.int16), tf.int32)
batch_shape = tf.concat([[input_shape[0]], [1], [1], [1]], 0)
batch_range = tf.reshape(tf.range(output_shape[0], dtype=tf.int32), shape=batch_shape)
b = one_like_mask * batch_range
y = tf.floordiv(mask, output_shape[2] * output_shape[3])
x = tf.mod(tf.floordiv(mask, output_shape[3]), output_shape[2]) #mask % (output_shape[2] * output_shape[3]) // output_shape[3]
feature_range = tf.range(output_shape[3], dtype=tf.int32)
f = one_like_mask * feature_range
# transpose indices & reshape update values to one dimension
updates_size = tf.size(updates)
indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, updates_size]))
values = tf.reshape(updates, [updates_size])
ret = tf.scatter_nd(indices, values, output_shape)
return ret
python类mod()的实例源码
def phi(times, s, tau):
# return tf.div(tf.mod(tf.mod(times - s, tau) + tau, tau), tau)
return tf.div(tf.mod(times - s, tau), tau)
def __init__(self, num_buckets, num_units_out, initializer=None, name=None,
trainable=True, mod_inputs=True):
"""Initializes the layer.
Args:
num_buckets: How many buckets the embedding has.
num_units_out: The number of output units in the layer.
initializer: the initializer for the weights. Defaults to uniform unit
scaling. The initializer can also be a Tensor or numpy array, in which
case the weights are initialized to this value and shape. Note that in
this case the weights will still be trainable unless you also pass
`trainable=False`.
name: An optional string name. Defaults to
`Embedding_%d_%d % (num_buckets, num_units_out)`. Used to name the
variable scope where the variables for the layer live.
trainable: Whether or not to make the weights trainable.
mod_inputs: Whether or not to mod the input by the number of buckets.
Raises:
ValueError: If the shape of `weights` is not
`(num_buckets, num_units_out)`.
"""
self.set_constructor_args('td.Embedding',
*get_local_arguments(Embedding.__init__, True))
self._weights_shape = (num_buckets, num_units_out)
if name is None: name = 'Embedding_%d_%d' % self._weights_shape
if initializer is None:
initializer = tf.uniform_unit_scaling_initializer(1.0)
elif isinstance(initializer, np.ndarray):
initializer = tf.convert_to_tensor(initializer)
if isinstance(initializer, tf.Tensor):
initializer.set_shape(self._weights_shape)
self._weights_shape = None # otherwise get_variable barfs
self._initializer = initializer
self._num_buckets = num_buckets
self._num_units_out = num_units_out
self._trainable = trainable
self._mod_inputs = bool(mod_inputs)
super(Embedding, self).__init__(
output_type=tdt.TensorType([num_units_out]), name_or_scope=name)
def time_error_loss(model_h, model_m, label_h, label_m):
"""
Compute the time error (in minutes) of the current model.
Total time difference is expressed in minutes:
1/N sum( delta(PP, TT))
where PP and TT are the predicted and true times, expressed in number of
minutes.
The delta operator takes care of 'wraparound', so that the difference
between 9'58 and 10'02 is 4 minutes.
We also return the individual errors for hours and minutes. Just for fun.
:param model_h:
:param model_m:
:param label_h:
:param label_m:
:return: losses for (combined, hours, minutes)
"""
# Take classifier argmax for most likely hour/minute, and cast everything to
# float32.
hours_predicted = tf.cast(tf.argmax(model_h, 1), tf.float32)
hours_true = tf.cast(label_h, tf.float32)
minutes_predicted = tf.cast(tf.argmax(model_m, 1), tf.float32)
minutes_true = tf.cast(label_m, tf.float32)
delta_time = tf.sub(tf.add(60 * hours_predicted, minutes_predicted),
tf.add(60 * hours_true, minutes_true))
delta_hours = tf.sub(hours_predicted, hours_true)
delta_minutes = tf.sub(minutes_predicted, minutes_true)
# TF's mod operator returns negative values:
# -7 mod 3 = -1 (we want 2)
# so we need to do a little extra work.
def positive_mod(val, div):
# Return the positive result of the modulo operator.
# Does x = ((v % div) + div) % div
return tf.mod(tf.add(tf.mod(val, div), div), div)
# Handle time wrapping around by comparing the mod of the positive and
# negative time differences.
time_error_c = tf.minimum(positive_mod(delta_time, 720),
positive_mod(-1 * delta_time, 720))
time_error_h = tf.minimum(positive_mod(delta_hours, 12.0),
positive_mod(-1 * delta_hours, 12.0))
time_error_m = tf.minimum(positive_mod(delta_minutes, 60.0),
positive_mod(-1 * delta_minutes, 60.0))
avg_error_c = tf.reduce_mean(time_error_c)
avg_error_h = tf.reduce_mean(time_error_h)
avg_error_m = tf.reduce_mean(time_error_m)
return avg_error_c, avg_error_h, avg_error_m
def tfidf(x, vocab_size, smooth=True, name=None):
"""Maps the terms in x to their term frequency * inverse document frequency.
The inverse document frequency of a term is calculated as 1+
log((corpus size + 1) / (document frequency of term + 1)) by default.
Example usage:
example strings [["I", "like", "pie", "pie", "pie"], ["yum", "yum", "pie]]
in: SparseTensor(indices=[[0, 0], [0, 1], [0, 2], [0, 3], [0, 4],
[1, 0], [1, 1], [1, 2]],
values=[1, 2, 0, 0, 0, 3, 3, 0])
out: SparseTensor(indices=[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]],
values=[1, 2, 0, 3, 0])
SparseTensor(indices=[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]],
values=[(1/5)*(log(3/2)+1), (1/5)*(log(3/2)+1), (1/5),
(1/3), (2/3)*(log(3/2)+1])
NOTE that the first doc's duplicate "pie" strings have been combined to
one output, as have the second doc's duplicate "yum" strings.
Args:
x: A `SparseTensor` representing int64 values (most likely that are the
result of calling string_to_int on a tokenized string).
vocab_size: An int - the count of vocab used to turn the string into int64s
including any OOV buckets.
smooth: A bool indicating if the inverse document frequency should be
smoothed. If True, which is the default, then the idf is calculated as
1 + log((corpus size + 1) / (document frequency of term + 1)).
Otherwise, the idf is
1 +log((corpus size) / (document frequency of term)), which could
result in a divizion by zero error.
name: (Optional) A name for this operation.
Returns:
Two `SparseTensor`s with indices [index_in_batch, index_in_bag_of_words].
The first has values vocab_index, which is taken from input `x`.
The second has values tfidf_weight.
"""
def _to_vocab_range(x):
"""Enforces that the vocab_ids in x are positive."""
return tf.SparseTensor(
indices=x.indices,
values=tf.mod(x.values, vocab_size),
dense_shape=x.dense_shape)
with tf.name_scope(name, 'tfidf'):
cleaned_input = _to_vocab_range(x)
term_frequencies = _to_term_frequency(cleaned_input, vocab_size)
count_docs_with_term_column = _count_docs_with_term(term_frequencies)
# Expand dims to get around the min_tensor_rank checks
sizes = tf.expand_dims(tf.shape(cleaned_input)[0], 0)
# [batch, vocab] - tfidf
tfidfs = _to_tfidf(term_frequencies,
analyzers.sum(count_docs_with_term_column,
reduce_instance_dims=False),
analyzers.sum(sizes),
smooth)
return _split_tfidfs_to_outputs(tfidfs)
def sample(self, logits, log_probs, prev_finished, time):
"""
sample based on logits.
:param logits: [_batch_size * beam_size, vocab.vocab_size]
:param log_probs: [_batch_size * beam_size,], log_probs of current
decoded sequence.
:param prev_finished: [_batch_size * beam_size,], indicate each beam
is finished or not.
:param time:
:return:
"""
# [_batch_size * beam_size, target_vocab_size]
probs = tf.nn.log_softmax(logits)
mask_tensor = [tf.float32.max] * self.vocab_size
mask_tensor[self.eos_id] = -1.
mask_tensor = tf.expand_dims(tf.constant(mask_tensor,
dtype=tf.float32), 0)
mask_probs = (tf.expand_dims(tf.to_float(prev_finished), 1)
* mask_tensor + 1.) * probs
# [_batch_size * beam_size, target_vocab_size]
log_probs = mask_probs + tf.expand_dims(log_probs, 1)
log_probs = tf.reshape(tf.reshape(log_probs, [-1]),
[self._batch_size, -1])
# flatten
log_probs_flat = tf.cond(
tf.convert_to_tensor(time) > 0, lambda: log_probs,
lambda: tf.slice(log_probs, [0, 0], [-1, self.vocab_size]))
next_log_probs, word_ids = tf.nn.top_k(log_probs_flat, k=self.beam_size)
next_log_probs = tf.reshape(next_log_probs, [-1])
word_ids = tf.reshape(word_ids, [-1])
sample_ids = tf.mod(word_ids, self.vocab_size)
# beam ids should be adjusted according to _batch_size
beam_add = tf.tile([tf.range(self._batch_size)],
[self.beam_size, 1]) * self.beam_size
beam_ids = tf.div(word_ids, self.vocab_size) \
+ tf.reshape(tf.transpose(beam_add), [-1])
return sample_ids, beam_ids, next_log_probs
def extract_features(inputs, k_idxs, map_h):
"""Extract top k fine features
NOTE.
do not use tf.image.extract_glimpse ops to get input patches
(cf. https://github.com/tensorflow/tensorflow/issues/2134)
"""
def _extract_feature(inputs, idxs):
idxs = tf.expand_dims(idxs,1)
idx_i = tf.floordiv(idxs, map_h)
idx_j = tf.mod(idxs, map_h)
# NOTE: the below origins are starting points, not center!
origin_i = 2*(2*idx_i+1)+3 - 5 + 2
origin_j = 2*(2*idx_j+1)+3 - 5 + 2
origin_centers = tf.concat(1,[origin_i,origin_j])
# NOTE: size also depends on the architecture
#patches = tf.image.extract_glimpse(inputs, size=[14,14], offsets=origin_centers,
# centered=False, normalized=False)
patches = extract_patches(inputs, size=[14,14], offsets=origin_centers)
#fine_features = fine_layers(patches)
fine_features = []
src_idxs = tf.concat(1,[idx_i,idx_j])
return fine_features, src_idxs, patches
k_features = []
k_src_idxs = []
k_patches = []
for i in xrange(N_PATCHES):
fine_feature, src_idx, patches = _extract_feature(inputs,k_idxs[:,i])
k_features.append(fine_feature)
k_src_idxs.append(src_idx)
k_patches.append(patches)
concat_patches = tf.concat(0,k_patches)
concat_k_features = fine_layers(concat_patches)
k_features = tf.split(0,N_PATCHES,concat_k_features)
return k_features, k_src_idxs, k_patches
def get_timing_signal_1d(length,
channels,
min_timescale=1.0,
max_timescale=1.0e4):
"""Gets a bunch of sinusoids of different frequencies.
Each channel of the input Tensor is incremented by a sinusoid of a different
frequency and phase.
This allows attention to learn to use absolute and relative positions.
Timing signals should be added to some precursors of both the query and the
memory inputs to attention.
The use of relative position is possible because sin(x+y) and cos(x+y) can be
experessed in terms of y, sin(x) and cos(x).
In particular, we use a geometric sequence of timescales starting with
min_timescale and ending with max_timescale. The number of different
timescales is equal to channels / 2. For each timescale, we
generate the two sinusoidal signals sin(timestep/timescale) and
cos(timestep/timescale). All of these sinusoids are concatenated in
the channels dimension.
Args:
length: scalar, length of timing signal sequence.
channels: scalar, size of timing embeddings to create. The number of
different timescales is equal to channels / 2.
min_timescale: a float
max_timescale: a float
Returns:
a Tensor of timing signals [1, length, channels]
"""
position = tf.to_float(tf.range(length))
num_timescales = channels // 2
log_timescale_increment = (
math.log(float(max_timescale) / float(min_timescale)) /
(tf.to_float(num_timescales) - 1))
inv_timescales = min_timescale * tf.exp(
tf.to_float(tf.range(num_timescales)) * -log_timescale_increment)
scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, 0)
signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1)
signal = tf.pad(signal, [[0, 0], [0, tf.mod(channels, 2)]])
signal = tf.reshape(signal, [1, length, channels])
return signal
def fast_dlstm(self, s_t, state_in, lstm, chunks, h_size):
def get_sub_state(state, state_step):
c, h = state
chunk_step_size = h_size // chunks
h_step = state_step * chunk_step_size
sub_state_h = h[:, h_step: h_step + chunk_step_size]
sub_state_c = c[:, h_step: h_step + chunk_step_size]
sub_state_h.set_shape([1, chunk_step_size])
sub_state_c.set_shape([1, chunk_step_size])
sub_state = tf.contrib.rnn.LSTMStateTuple(sub_state_c, sub_state_h)
return sub_state
def build_new_state(new_sub_state, previous_state, state_step):
c_previous_state, h_previous_state = previous_state
c_new_sub_state, h_new_sub_state = new_sub_state
h_slices = []
c_slices = []
chunk_step_size = h_size // chunks
one_hot_state_step = tf.one_hot(state_step, depth=chunks)
for switch_step, h_step in zip(range(chunks), range(0, h_size, chunk_step_size)):
is_this_current_step = one_hot_state_step[switch_step]
h_s = self.conditional_sub_state(is_this_current_step, h_new_sub_state,
h_previous_state[:, h_step: h_step + chunk_step_size])
h_s.set_shape([1, chunk_step_size])
c_s = self.conditional_sub_state(is_this_current_step,
c_new_sub_state,
c_previous_state[:, h_step: h_step + chunk_step_size])
c_s.set_shape([1, chunk_step_size])
h_slices.append(h_s)
c_slices.append(c_s)
h_new_state = tf.concat(h_slices, axis=1)
c_new_state = tf.concat(c_slices, axis=1)
new_state = tf.contrib.rnn.LSTMStateTuple(c_new_state, h_new_state)
return new_state
def dlstm_scan_fn(previous_output, current_input):
# out, state_out = lstm(current_input, previous_output[1])
state_step = previous_output[2]
sub_state = get_sub_state(previous_output[1], state_step)
out, sub_state_out = lstm(current_input, sub_state)
state_out = build_new_state(sub_state_out, previous_output[1], state_step)
state_step += tf.constant(1)
new_state_step = tf.mod(state_step, chunks)
return out, state_out, new_state_step
chunk_step_size = h_size // chunks
first_input = state_in.c[:, 0: chunk_step_size]
rnn_outputs, final_states, mod_idxs = tf.scan(dlstm_scan_fn,
tf.transpose(s_t, [1, 0, 2]),
initializer=(
first_input, state_in, tf.constant(0)), name="dlstm")
return rnn_outputs, final_states
def test_copy_from_works_with_control_flow(self):
def graph_fn1(mode, x):
return plx.layers.Dense(units=1)(x)
def graph_fn2(mode, x):
return plx.layers.Dense(units=1, trainable=False)(x)
l1 = plx.FunctionModule(mode=plx.Modes.TRAIN, build_fn=graph_fn1, name='fn1')
l2 = plx.FunctionModule(mode=plx.Modes.TRAIN, build_fn=graph_fn2, name='fn2')
x = tf.placeholder(dtype=tf.float32, shape=[1, 1])
lx1 = l1(x)
lx2 = l2(x)
init_all_op = tf.global_variables_initializer()
def copy():
# note that we need to put this copy_op in a function otherwise it will always
# be evaluate no matter what the condition
return l2.copy_from(l1, tf.GraphKeys.GLOBAL_VARIABLES)
a = tf.placeholder(tf.int32, ())
cond = tf.cond(tf.equal(tf.mod(a, 5), 0), copy, lambda: tf.no_op())
assign_op = l1.get_variables()[0].assign_add([[1]])
group_op = tf.group(*[assign_op, cond])
with self.test_session() as sess:
sess.run(init_all_op)
# Check that initially they have different values
lx1_results = lx1.eval({x: [[1]]})
lx2_results = lx2.eval({x: [[1]]})
assert lx1_results[0] != lx2_results[0]
# Set condition to True 10 % 5 == 0
sess.run(cond, feed_dict={a: 10})
lx1_results = lx1.eval({x: [[1]]})
lx2_results = lx2.eval({x: [[1]]})
assert lx1_results[0] == lx2_results[0]
# Assign and Set condition to False 2 % 5 != 0
sess.run(group_op, feed_dict={a: 2})
lx1_results = lx1.eval({x: [[1]]})
lx2_results = lx2.eval({x: [[1]]})
assert lx1_results[0] != lx2_results[0]