def ae(x):
if nonlinearity_name == 'relu':
f = tf.nn.relu
elif nonlinearity_name == 'elu':
f = tf.nn.elu
elif nonlinearity_name == 'gelu':
# def gelu(x):
# return tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.)
# f = gelu
def gelu_fast(_x):
return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
f = gelu_fast
elif nonlinearity_name == 'silu':
def silu(_x):
return _x * tf.sigmoid(_x)
f = silu
# elif nonlinearity_name == 'soi':
# def soi_map(x):
# u = tf.random_uniform(tf.shape(x))
# mask = tf.to_float(tf.less(u, (1 + tf.erf(x / tf.sqrt(2.))) / 2.))
# return tf.cond(is_training, lambda: tf.mul(mask, x),
# lambda: tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.))
# f = soi_map
else:
raise NameError("Need 'relu', 'elu', 'gelu', or 'silu' for nonlinearity_name")
h1 = f(tf.matmul(x, W['1']) + b['1'])
h2 = f(tf.matmul(h1, W['2']) + b['2'])
h3 = f(tf.matmul(h2, W['3']) + b['3'])
h4 = f(tf.matmul(h3, W['4']) + b['4'])
h5 = f(tf.matmul(h4, W['5']) + b['5'])
h6 = f(tf.matmul(h5, W['6']) + b['6'])
h7 = f(tf.matmul(h6, W['7']) + b['7'])
return tf.matmul(h7, W['8']) + b['8']
python类to_float()的实例源码
def compute_loss(self, decoder_output, _features, labels):
"""Computes the loss for this model.
Returns a tuple `(losses, loss)`, where `losses` are the per-batch
losses and loss is a single scalar tensor to minimize.
"""
#pylint: disable=R0201
# Calculate loss per example-timestep of shape [B, T]
losses = seq2seq_losses.cross_entropy_sequence_loss(
logits=decoder_output.logits[:, :, :],
targets=tf.transpose(labels["target_ids"][:, 1:], [1, 0]),
sequence_length=labels["target_len"] - 1)
# Calculate the average log perplexity
loss = tf.reduce_sum(losses) / tf.to_float(
tf.reduce_sum(labels["target_len"] - 1))
return losses, loss
def encode(self, inputs):
inputs = tf.image.resize_images(
images=inputs,
size=[self.params["resize_height"], self.params["resize_width"]],
method=tf.image.ResizeMethod.BILINEAR)
outputs, _ = inception_v3_base(tf.to_float(inputs))
output_shape = outputs.get_shape() #pylint: disable=E1101
shape_list = output_shape.as_list()
# Take attentin over output elemnts in width and height dimension:
# Shape: [B, W*H, ...]
outputs_flat = tf.reshape(outputs, [shape_list[0], -1, shape_list[-1]])
# Final state is the pooled output
# Shape: [B, W*H*...]
final_state = tf.contrib.slim.avg_pool2d(
outputs, output_shape[1:3], padding="VALID", scope="pool")
final_state = tf.contrib.slim.flatten(outputs, scope="flatten")
return EncoderOutput(
outputs=outputs_flat,
final_state=final_state,
attention_values=outputs_flat,
attention_values_length=tf.shape(outputs_flat)[1])
def _add_mh_correction(self, initial_position, initial_velocity, final_position, final_velocity):
""" Applies MH accept/reject correction. """
initial_energy = self._hamiltonian(initial_position, initial_velocity)
final_energy = self._hamiltonian(final_position, final_velocity)
accepted = self._metropolis_hastings_accept(initial_energy, final_energy)
accepted = tf.to_float(accepted)
# add acceptance to fetched values
self._accepted = accepted
if self.seek_step_sizes or self.fade_in_velocities:
burned_in = tf.to_float(self._burn_in_ratio == 1)
accepted = accepted * burned_in + tf.ones(shape=tf.shape(accepted)) * (1 - burned_in)
# apply MH decision
final_position = self._transpose_mul(final_position, accepted) + \
self._transpose_mul(initial_position, tf.ones(shape=tf.shape(accepted)) - accepted)
final_velocity = self._transpose_mul(final_velocity, accepted) + \
self._transpose_mul(-initial_velocity, tf.ones(shape=tf.shape(accepted)) - accepted)
return final_position, final_velocity
def mu_law_encode_nonlinear(audio, quantization_channels=256):
'''
Compress the waveform amplitudes using mu-law non-linearity.
NOTE: This mu-law functions as a non-linear function as opposed to
quantization.
'''
with tf.name_scope('encode'):
mu = tf.to_float(quantization_channels - 1)
# Perform mu-law companding transformation (ITU-T, 1988).
# Minimum operation is here to deal with rare large amplitudes caused
# by resampling.
safe_audio_abs = tf.minimum(tf.abs(audio), 1.0)
magnitude = tf.log1p(mu * safe_audio_abs) / tf.log1p(mu)
signal = tf.multiply(tf.sign(audio), magnitude, name='mulaw')
# Quantize signal to the specified number of levels.
# return tf.to_int32((signal + 1) / 2 * mu + 0.5)
return signal
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bboxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], 1))
if cfg.RESNET.MAX_POOL:
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size],
name="crops")
crops = slim.max_pool2d(crops, [2, 2], padding='SAME')
else:
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [cfg.POOLING_SIZE, cfg.POOLING_SIZE],
name="crops")
return crops
# Do the first few layers manually, because 'SAME' padding can behave inconsistently
# for images of different sizes: sometimes 0, sometimes 1
def _smooth_l1_loss(self, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=1.0, dim=[1]):
sigma_2 = sigma ** 2
box_diff = bbox_pred - bbox_targets
in_box_diff = bbox_inside_weights * box_diff
abs_in_box_diff = tf.abs(in_box_diff)
smoothL1_sign = tf.stop_gradient(tf.to_float(tf.less(abs_in_box_diff, 1. / sigma_2)))
in_loss_box = tf.pow(in_box_diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
+ (abs_in_box_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
out_loss_box = bbox_outside_weights * in_loss_box
loss_box = tf.reduce_mean(tf.reduce_sum(
out_loss_box,
axis=dim
))
return loss_box
def _crop_pool_layer(self, bottom, rois, name):
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bounding boxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0])
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height
# Won't be back-propagated to rois anyway, but to save time
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops")
return slim.max_pool2d(crops, [2, 2], padding='SAME')
def get_label_queue(self,batch_size):
tf_labels = tf.convert_to_tensor(self.attr.values, dtype=tf.uint8)#0,1
with tf.name_scope('label_queue'):
uint_label=tf.train.slice_input_producer([tf_labels])[0]
label=tf.to_float(uint_label)
#All labels, not just those in causal_model
dict_data={sl:tl for sl,tl in
zip(self.label_names,tf.split(label,len(self.label_names)))}
num_preprocess_threads = max(self.num_worker-3,1)
data_batch = tf.train.shuffle_batch(
dict_data,
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity=self.min_queue_examples + 3 * batch_size,
min_after_dequeue=self.min_queue_examples,
)
return data_batch
def l2_loss(tensor, weight=1.0, scope=None, normalize=False):
"""Define a L2Loss, useful for regularize, i.e. weight decay.
Args:
tensor: tensor to regularize.
weight: an optional weight to modulate the loss.
scope: Optional scope for op_scope.
Returns:
the L2 loss op.
"""
with tf.op_scope([tensor], scope, 'L2Loss'):
weight = tf.convert_to_tensor(weight,
dtype=tensor.dtype.base_dtype,
name='loss_weight')
if normalize:
loss = tf.sqrt( (tf.sqrt( tf.nn.l2_loss(tensor)) / tf.to_float(tf.size(tensor))) , name='value')
else:
loss = tf.mul(weight, tf.nn.l2_loss(tensor), name='value')
tf.add_to_collection(LOSSES_COLLECTION, loss)
return loss
def smoothing_cross_entropy(self,logits, labels, vocab_size, confidence=0.9): #confidence = 1.0 - label_smoothing. where label_smooth=0.1. from http://github.com/tensorflow/tensor2tensor
"""Cross entropy with label smoothing to limit over-confidence."""
with tf.name_scope("smoothing_cross_entropy", [logits, labels]):
# Low confidence is given to all non-true labels, uniformly.
low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1)
# Normalizing constant is the best cross-entropy value with soft targets.
# We subtract it just for readability, makes no difference on learning.
normalizing = -(confidence * tf.log(confidence) + tf.to_float(vocab_size - 1) * low_confidence * tf.log(low_confidence + 1e-20))
# Soft targets.
soft_targets = tf.one_hot(
tf.cast(labels, tf.int32),
depth=vocab_size,
on_value=confidence,
off_value=low_confidence)
xentropy = tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=soft_targets)
return xentropy - normalizing
a8_dynamic_memory_network.py 文件源码
项目:text_classification
作者: brightmart
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def smoothing_cross_entropy(self,logits, labels, vocab_size, confidence=0.9): #confidence = 1.0 - label_smoothing. where label_smooth=0.1. from http://github.com/tensorflow/tensor2tensor
"""Cross entropy with label smoothing to limit over-confidence."""
with tf.name_scope("smoothing_cross_entropy", [logits, labels]):
# Low confidence is given to all non-true labels, uniformly.
low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1)
# Normalizing constant is the best cross-entropy value with soft targets.
# We subtract it just for readability, makes no difference on learning.
normalizing = -(confidence * tf.log(confidence) + tf.to_float(vocab_size - 1) * low_confidence * tf.log(low_confidence + 1e-20))
# Soft targets.
soft_targets = tf.one_hot(
tf.cast(labels, tf.int32),
depth=vocab_size,
on_value=confidence,
off_value=low_confidence)
xentropy = tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=soft_targets)
return xentropy - normalizing
def build_loss(self, inp, output):
y_gt = inp['y_gt']
y_out = output['y_out']
ce = tfplus.nn.CE()({'y_gt': y_gt, 'y_out': y_out})
num_ex_f = tf.to_float(tf.shape(inp['x'])[0])
ce = tf.reduce_sum(ce) / num_ex_f
self.add_loss(ce)
total_loss = self.get_loss()
self.register_var('loss', total_loss)
ans = tf.argmax(y_gt, 1)
correct = tf.equal(ans, tf.argmax(y_out, 1))
top5_acc = tf.reduce_sum(tf.to_float(
tf.nn.in_top_k(y_out, ans, 5))) / num_ex_f
self.register_var('top5_acc', top5_acc)
acc = tf.reduce_sum(tf.to_float(correct)) / num_ex_f
self.register_var('acc', acc)
return total_loss
def _score(self, prev_decoder_state, prev_embedding):
# Returns scores in a tensor of shape [batch_size, input_sequence_length]
if self.mode == 'decode':
query_part = self.query_attention_partial_score_placeholder
encoder_part = self.encoder_state_attention_partial_scores_placeholder
else:
query_part = self.query_attention_partial_score
encoder_part = self.encoder_state_attention_partial_scores
embedding_part = tf.matmul(prev_embedding, self.attention_w_e)
output = tf.matmul(prev_decoder_state,
self.attention_w) + embedding_part + query_part + encoder_part + self.attention_b
output = tf.tanh(output)
output = tf.reduce_sum(self.attention_v * output, axis=2)
output = tf.transpose(output, [1, 0])
# Handle input document padding by giving a large penalty, eliminating it from the weighted average
padding_penalty = -1e20 * tf.to_float(1 - tf.sign(self.documents_placeholder))
masked = output + padding_penalty
return masked
def cyclic_learning_rate(
learning_rate_min,
learning_rate_max,
step_size,
global_step,
mode='triangular',
scope=None):
with tf.variable_scope(scope, 'CyclicLearningRate'):
cycle = tf.floor(1 + tf.to_float(global_step) / (2 * step_size))
if mode == 'triangular':
scale = 1
elif mode == 'triangular2':
scale = 2**(cycle - 1)
else:
raise ValueError('Unrecognized mode: {}'.format(mode))
x = tf.abs(tf.to_float(global_step) / step_size - 2 * cycle + 1)
lr = learning_rate_min + (learning_rate_max - learning_rate_min) * \
tf.maximum(0.0, 1 - x) / scale
return lr
def ar_layer(z0,hps,n_hidden=10):
''' old iaf layer '''
# Repeat input
z_rep = tf.reshape(tf.tile(z0,[1,hps.z_size]),[-1,hps.z_size])
# make mask
mask = tf.sequence_mask(tf.range(hps.z_size),hps.z_size)[None,:,:]
mask = tf.reshape(tf.tile(mask,[tf.shape(z0)[0],1,1]),[-1,hps.z_size])
# predict mu and sigma
z_mask = z_rep * tf.to_float(mask)
mid = slim.fully_connected(z_mask,n_hidden,activation_fn=tf.nn.relu)
pars = slim.fully_connected(mid,2,activation_fn=None)
pars = tf.reshape(pars,[-1,hps.z_size,2])
mu, log_sigma = tf.unstack(pars,axis=2)
return mu, log_sigma
def shrink_soft_threshold(r,rvar,theta):
"""
soft threshold function
y=sign(x)*max(0,abs(x)-theta[0]*sqrt(rvar) )*scaling
where scaling is theta[1] (default=1)
in other words, if theta is len(1), then the standard
"""
if len(theta.get_shape())>0 and theta.get_shape() != (1,):
lam = theta[0] * tf.sqrt(rvar)
scale=theta[1]
else:
lam = theta * tf.sqrt(rvar)
scale = None
lam = tf.maximum(lam,0)
arml = tf.abs(r) - lam
xhat = tf.sign(r) * tf.maximum(arml,0)
dxdr = tf.reduce_mean(tf.to_float(arml>0),0)
if scale is not None:
xhat = xhat*scale
dxdr = dxdr*scale
return (xhat,dxdr)
def shrink_spline(r,rvar,theta):
""" Spline-based shrinkage function
"""
scale = theta[0]*tf.sqrt(rvar)
rs = tf.sign(r)
ar = tf.abs(r/scale)
ar2 = tf.square(ar)
ar3 = ar*ar2
reg1 = tf.to_float(ar<1)
reg2 = tf.to_float(ar<2)-reg1
ar_m2 = 2-ar
ar_m2_p2 = tf.square(ar_m2)
ar_m2_p3 = ar_m2*ar_m2_p2
beta3 = ( (2./3 - ar2 + .5*ar3)*reg1 + (1./6*(ar_m2_p3))*reg2 )
xhat = r*(theta[1] + theta[2]*beta3)
return (xhat,auto_gradients(xhat,r))
def _embed_sentences(self):
"""Tensorflow implementation of Simple but Tough-to-Beat Baseline"""
# Get word features
word_embeddings = self._get_embedding()
word_feats = tf.nn.embedding_lookup(word_embeddings, self.input)
# Get marginal estimates and scaling term
batch_size = tf.shape(word_feats)[0]
a = tf.pow(10.0, self._get_a_exp())
p = tf.constant(self.marginals, dtype=tf.float32, name='marginals')
q = tf.reshape(
a / (a + tf.nn.embedding_lookup(p, self.input)),
(batch_size, self.mx_len, 1)
)
# Compute initial sentence embedding
z = tf.reshape(1.0 / tf.to_float(self.input_lengths), (batch_size, 1))
S = z * tf.reduce_sum(q * word_feats, axis=1)
# Compute common component
S_centered = S - tf.reduce_mean(S, axis=0)
_, _, V = tf.svd(S_centered, full_matrices=False, compute_uv=True)
self.tf_ccx = tf.stop_gradient(tf.gather(tf.transpose(V), 0))
# Common component removal
ccx = tf.reshape(self._get_common_component(), (1, self.d))
sv = {'embeddings': word_embeddings, 'a': a, 'p': p, 'ccx': ccx}
return S - tf.matmul(S, ccx * tf.transpose(ccx)), sv
def rickerWavelet(scale, sampleCount):
def waveEquation(time):
time = tf.to_float(time)
tSquare = time ** 2.
sigma = 1.
sSquare = sigma ** 2.
# _1 = 2 / ((3 * a) ** .5 * np.pi ** .25)
_1a = (3. * sigma) ** .5
_1b = np.pi ** .25
_1 = 2. / (_1a * _1b)
# _2 = 1 - t**2 / a**2
_2 = 1. - tSquare / sSquare
# _3 = np.exp(-(t**2) / (2 * a ** 2))
_3a = -1. * tSquare
_3b = 2. * sSquare
_3 = tf.exp(_3a / _3b)
return _1 * _2 * _3
return waveletHelper(scale, sampleCount, waveEquation)
def process_image(img, scale, isotropic, crop, mean):
'''Crops, scales, and normalizes the given image.
scale : The image wil be first scaled to this size.
If isotropic is true, the smaller side is rescaled to this,
preserving the aspect ratio.
crop : After scaling, a central crop of this size is taken.
mean : Subtracted from the image
'''
# Rescale
if isotropic:
img_shape = tf.to_float(tf.shape(img)[:2])
min_length = tf.minimum(img_shape[0], img_shape[1])
new_shape = tf.to_int32((scale / min_length) * img_shape)
else:
new_shape = tf.pack([scale, scale])
img = tf.image.resize_images(img, new_shape[0], new_shape[1])
# Center crop
# Use the slice workaround until crop_to_bounding_box supports deferred tensor shapes
# See: https://github.com/tensorflow/tensorflow/issues/521
offset = (new_shape - crop) / 2
img = tf.slice(img, begin=tf.pack([offset[0], offset[1], 0]), size=tf.pack([crop, crop, -1]))
# Mean subtraction
return tf.to_float(img) - mean
def create_test_input(batch_size, height, width, channels):
"""Create test input tensor.
Args:
batch_size: The number of images per batch or `None` if unknown.
height: The height of each image or `None` if unknown.
width: The width of each image or `None` if unknown.
channels: The number of channels per image or `None` if unknown.
Returns:
Either a placeholder `Tensor` of dimension
[batch_size, height, width, channels] if any of the inputs are `None` or a
constant `Tensor` with the mesh grid values along the spatial dimensions.
"""
if None in [batch_size, height, width, channels]:
return tf.placeholder(tf.float32, (batch_size, height, width, channels))
else:
return tf.to_float(
np.tile(
np.reshape(
np.reshape(np.arange(height), [height, 1]) +
np.reshape(np.arange(width), [1, width]),
[1, height, width, 1]),
[batch_size, 1, 1, channels]))
def create_test_input(batch_size, height, width, channels):
"""Create test input tensor.
Args:
batch_size: The number of images per batch or `None` if unknown.
height: The height of each image or `None` if unknown.
width: The width of each image or `None` if unknown.
channels: The number of channels per image or `None` if unknown.
Returns:
Either a placeholder `Tensor` of dimension
[batch_size, height, width, channels] if any of the inputs are `None` or a
constant `Tensor` with the mesh grid values along the spatial dimensions.
"""
if None in [batch_size, height, width, channels]:
return tf.placeholder(tf.float32, (batch_size, height, width, channels))
else:
return tf.to_float(
np.tile(
np.reshape(
np.reshape(np.arange(height), [height, 1]) +
np.reshape(np.arange(width), [1, width]),
[1, height, width, 1]),
[batch_size, 1, 1, channels]))
def preprocess_for_eval(image, output_height, output_width, resize_side):
"""Preprocesses the given image for evaluation.
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
resize_side: The smallest side of the image for aspect-preserving resizing.
Returns:
A preprocessed image.
"""
image = _aspect_preserving_resize(image, resize_side)
image = _central_crop([image], output_height, output_width)[0]
image.set_shape([output_height, output_width, 3])
image = tf.to_float(image)
return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
def extract_batch(dataset, config):
with tf.device("/cpu:0"):
bboxer = PriorBoxGrid(config)
data_provider = slim.dataset_data_provider.DatasetDataProvider(
dataset, num_readers=2,
common_queue_capacity=512, common_queue_min=32)
if args.segment:
im, bbox, gt, seg = data_provider.get(['image', 'object/bbox', 'object/label',
'image/segmentation'])
else:
im, bbox, gt = data_provider.get(['image', 'object/bbox', 'object/label'])
seg = tf.expand_dims(tf.zeros(tf.shape(im)[:2]), 2)
im = tf.to_float(im)/255
bbox = yxyx_to_xywh(tf.clip_by_value(bbox, 0.0, 1.0))
im, bbox, gt, seg = data_augmentation(im, bbox, gt, seg, config)
inds, cats, refine = bboxer.encode_gt_tf(bbox, gt)
return tf.train.shuffle_batch([im, inds, refine, cats, seg],
args.batch_size, 2048, 64, num_threads=4)
def zoomout(image, gt_bboxes, params):
X_out = tf.random_uniform([], 1.05, params['X_out'])
h, w, _ = tf.unstack(tf.to_float(tf.shape(image)))
zoomout_color = params['zoomout_color']+[0]
bg_color = tf.constant(zoomout_color, dtype=tf.float32)
x_shift = tf.random_uniform([], 0, (X_out - 1) * w)
y_shift = tf.random_uniform([], 0, (X_out - 1) * h)
x2_shift = (X_out - 1) * w - x_shift
y2_shift = (X_out - 1) * h - y_shift
# somewhat hacky solution to pad with MEAN_COLOR
# tf.pad does not support custom constant padding unlike numpy
image -= bg_color
image = tf.pad(image, tf.to_int32([[y_shift, y2_shift], [x_shift, x2_shift], [0, 0]]))
image += bg_color
gt_x, gt_y, gt_w, gt_h = tf.unstack(gt_bboxes, axis=1)
gt_bboxes = tf.stack([gt_x + x_shift/w,
gt_y + y_shift/h,
gt_w, gt_h], axis=1)/X_out
return image, gt_bboxes
def __init__(self, epsilon=1e-2, shape=()):
self._sum = tf.get_variable(
dtype=tf.float64,
shape=shape,
initializer=tf.constant_initializer(0.0),
name="runningsum", trainable=False)
self._sumsq = tf.get_variable(
dtype=tf.float64,
shape=shape,
initializer=tf.constant_initializer(epsilon),
name="runningsumsq", trainable=False)
self._count = tf.get_variable(
dtype=tf.float64,
shape=(),
initializer=tf.constant_initializer(epsilon),
name="count", trainable=False)
self.shape = shape
self.mean = tf.to_float(self._sum / self._count)
self.std = tf.sqrt( tf.maximum( tf.to_float(self._sumsq / self._count) - tf.square(self.mean) , 1e-2 ))
newsum = tf.placeholder(shape=self.shape, dtype=tf.float64, name='sum')
newsumsq = tf.placeholder(shape=self.shape, dtype=tf.float64, name='var')
newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count')
self.incfiltparams = U.function([newsum, newsumsq, newcount], [],
updates=[tf.assign_add(self._sum, newsum),
tf.assign_add(self._sumsq, newsumsq),
tf.assign_add(self._count, newcount)])
def load_image(image_file, image_size=None):
"""Loads an image and center-crops it to a specific size.
Args:
image_file: str. Image file.
image_size: int, optional. Desired size. If provided, crops the image to
a square and resizes it to the requested size. Defaults to None.
Returns:
A 4-D tensor of shape [1, image_size, image_size, 3] and dtype float32,
with values in [0, 1].
"""
image = tf.constant(np.uint8(load_np_image(image_file) * 255.0))
if image_size is not None:
# Center-crop into a square and resize to image_size
small_side = min(image.get_shape()[0].value, image.get_shape()[1].value)
image = tf.image.resize_image_with_crop_or_pad(
image, small_side, small_side)
image = tf.image.resize_images(image, [image_size, image_size])
image = tf.to_float(image) / 255.0
return tf.expand_dims(image, 0)
def center_crop_resize_image(image, image_size):
"""Center-crop into a square and resize to image_size.
Args:
image: A 3-D image `Tensor`.
image_size: int, Desired size. Crops the image to a square and resizes it
to the requested size.
Returns:
A 4-D tensor of shape [1, image_size, image_size, 3] and dtype float32,
with values in [0, 1].
"""
shape = tf.shape(image)
small_side = tf.minimum(shape[0], shape[1])
image = tf.image.resize_image_with_crop_or_pad(image, small_side, small_side)
image = tf.to_float(image) / 255.0
image = tf.image.resize_images(image, tf.constant([image_size, image_size]))
return tf.expand_dims(image, 0)
def encode(self, sequence, sequence_length):
"""Encodes input sequences into a MultivariateNormalDiag distribution."""
hparams = self.hparams
z_size = hparams.z_size
sequence = tf.to_float(sequence)
encoder_output = self.encoder.encode(sequence, sequence_length)
mu = tf.layers.dense(
encoder_output,
z_size,
name='encoder/mu',
kernel_initializer=tf.random_normal_initializer(stddev=0.001))
sigma = tf.layers.dense(
encoder_output,
z_size,
activation=tf.nn.softplus,
name='encoder/sigma',
kernel_initializer=tf.random_normal_initializer(stddev=0.001))
return ds.MultivariateNormalDiag(loc=mu, scale_diag=sigma)