def pre_process_data(image,training):
if training:
image = tf.random_crop(image,size=[img_size_cropped,img_size_cropped,cifar10.num_channels])
image = tf.image.flip_left_right(image)
image = tf.image.random_hue(image)
image = tf.image.random_contrast(image)
image = tf.image.random_saturation(image)
image = tf.image.random_brightness(image)
image = tf.maximum(image,1.0)
image = tf.minimum(image,0.0)
else:
#for testing image
image = tf.image.resize_image_with_crop_or_pad(image,img_size_cropped,img_size_cropped);
return image
python类maximum()的实例源码
def _impute2D(self, X_2D):
r"""Mean impute a rank 2 tensor."""
# Fill zeros in for missing data initially
data_zeroed_missing_tf = X_2D * self.real_val_mask
# Sum the real values in each column
col_tot = tf.reduce_sum(data_zeroed_missing_tf, 0)
# Divide column totals by the number of non-nan values
num_values_col = tf.reduce_sum(self.real_val_mask, 0)
num_values_col = tf.maximum(num_values_col,
tf.ones(tf.shape(num_values_col)))
col_nan_means = tf.div(col_tot, num_values_col)
# Make an vector of the impute values for each missing point
imputed_vals = tf.gather(col_nan_means, self.missing_ind[:, 1])
# Fill the imputed values into the data tensor of zeros
shape = tf.cast(tf.shape(data_zeroed_missing_tf), dtype=tf.int64)
missing_imputed = tf.scatter_nd(self.missing_ind, imputed_vals, shape)
X_with_impute = data_zeroed_missing_tf + missing_imputed
return X_with_impute
def filter_prediction(self, boxes, probs, cls_idx):
"""Filter bounding box predictions with probability threshold and
non-maximum supression.
Args:
boxes: array of [cx, cy, w, h].
probs: array of probabilities
cls_idx: array of class indices
Returns:
final_boxes: array of filtered bounding boxes.
final_probs: array of filtered probabilities
final_cls_idx: array of filtered class indices
"""
mc = self.mc
if mc.TOP_N_DETECTION < len(probs) and mc.TOP_N_DETECTION > 0:
order = probs.argsort()[:-mc.TOP_N_DETECTION-1:-1]
probs = probs[order]
boxes = boxes[order]
cls_idx = cls_idx[order]
else:
filtered_idx = np.nonzero(probs>mc.PROB_THRESH)[0]
probs = probs[filtered_idx]
boxes = boxes[filtered_idx]
cls_idx = cls_idx[filtered_idx]
final_boxes = []
final_probs = []
final_cls_idx = []
for c in range(mc.CLASSES):
idx_per_class = [i for i in range(len(probs)) if cls_idx[i] == c]
keep = util.nms(boxes[idx_per_class], probs[idx_per_class], mc.NMS_THRESH)
for i in range(len(keep)):
if keep[i]:
final_boxes.append(boxes[idx_per_class[i]])
final_probs.append(probs[idx_per_class[i]])
final_cls_idx.append(c)
return final_boxes, final_probs, final_cls_idx
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):
num_extend = FLAGS.moe_num_extend
num_layers = num_extend
lstm_size = FLAGS.lstm_cells
pool_size=2
cnn_input = model_input
num_filters=[256,256,512]
filter_sizes=[1,2,3]
features_size = sum(num_filters)
final_probilities = []
moe_inputs = []
for layer in range(num_layers):
cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
cnn_output = tf.nn.relu(cnn_output)
cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1))
moe_inputs.append(cnn_multiscale)
final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1))
final_probilities.append(final_probility)
num_t = pool_size*(num_t//pool_size)
cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
cnn_input = tf.reduce_max(cnn_output, axis=2)
num_frames = tf.maximum(num_frames//pool_size,1)
final_probilities = tf.stack(final_probilities,axis=1)
moe_inputs = tf.stack(moe_inputs,axis=1)
weight2d = tf.get_variable("ensemble_weight2d",
shape=[num_extend, features_size, vocab_size],
regularizer=slim.l2_regularizer(1.0e-8))
weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
result = {}
result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
return result
def create_model(self, model_input, vocab_size, num_frames, distill_labels=None, l2_penalty=1e-8, **unused_params):
num_extend = FLAGS.moe_num_extend
num_layers = num_extend
lstm_size = FLAGS.lstm_cells
pool_size = 2
cnn_input = model_input
cnn_size = FLAGS.cnn_cells
num_filters = [cnn_size, cnn_size, cnn_size*2]
filter_sizes = [1, 2, 3]
features_size = sum(num_filters)
final_probilities = []
moe_inputs = []
for layer in range(num_layers):
cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
cnn_output = tf.nn.relu(cnn_output)
cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1))
moe_inputs.append(cnn_multiscale)
final_probility = self.sub_moe(cnn_multiscale,vocab_size,distill_labels=distill_labels, scopename="moe%d"%(layer+1))
final_probilities.append(final_probility)
num_t = pool_size*(num_t//pool_size)
cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
cnn_input = tf.reduce_max(cnn_output, axis=2)
num_frames = tf.maximum(num_frames//pool_size,1)
final_probilities = tf.stack(final_probilities,axis=1)
moe_inputs = tf.stack(moe_inputs,axis=1)
weight2d = tf.get_variable("ensemble_weight2d",
shape=[num_extend, lstm_size, vocab_size],
regularizer=slim.l2_regularizer(1.0e-8))
weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
result = {}
result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
return result
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):
num_extend = FLAGS.moe_num_extend
num_layers = num_extend
lstm_size = FLAGS.lstm_cells
pool_size = 2
cnn_input = model_input
num_filters = [256, 256, 512]
filter_sizes = [1, 2, 3]
features_size = sum(num_filters)
final_probilities = []
moe_inputs = []
for layer in range(num_layers):
cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
cnn_output = tf.nn.relu(cnn_output)
cnn_multiscale = self.rnn_gate(cnn_output, lstm_size, num_frames, sub_scope="rnn%d"%(layer+1))
moe_inputs.append(cnn_multiscale)
final_probility = self.sub_moe(cnn_multiscale, vocab_size, scopename="moe%d"%(layer+1))
final_probilities.append(final_probility)
num_t = pool_size*(num_t//pool_size)
cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
cnn_input = tf.reduce_max(cnn_output, axis=2)
num_frames = tf.maximum(num_frames//pool_size,1)
final_probilities = tf.stack(final_probilities, axis=1)
moe_inputs = tf.stack(moe_inputs, axis=1)
weight2d = tf.get_variable("ensemble_weight2d",
shape=[num_extend, features_size, vocab_size],
regularizer=slim.l2_regularizer(1.0e-8))
weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
result = {}
result["prediction_frames"] = tf.reshape(final_probilities,[-1, vocab_size])
result["predictions"] = tf.reduce_mean(final_probilities, axis=1)
return result
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):
num_extend = FLAGS.moe_num_extend
num_layers = num_extend
lstm_size = FLAGS.lstm_cells
pool_size = 2
cnn_input = model_input
num_filters = [256, 256, 512]
filter_sizes = [1, 2, 3]
features_size = sum(num_filters)
final_probilities = []
moe_inputs = []
for layer in range(num_layers):
cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
cnn_output = tf.nn.relu(cnn_output)
cnn_multiscale = self.rnn_glu(cnn_output, lstm_size, num_frames, sub_scope="rnn%d"%(layer+1))
moe_inputs.append(cnn_multiscale)
final_probility = self.sub_moe(cnn_multiscale, vocab_size, scopename="moe%d"%(layer+1))
final_probilities.append(final_probility)
num_t = pool_size*(num_t//pool_size)
cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
cnn_input = tf.reduce_max(cnn_output, axis=2)
num_frames = tf.maximum(num_frames//pool_size,1)
final_probilities = tf.stack(final_probilities, axis=1)
moe_inputs = tf.stack(moe_inputs, axis=1)
weight2d = tf.get_variable("ensemble_weight2d",
shape=[num_extend, features_size, vocab_size],
regularizer=slim.l2_regularizer(1.0e-8))
weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
result = {}
result["prediction_frames"] = tf.reshape(final_probilities,[-1, vocab_size])
result["predictions"] = tf.reduce_mean(final_probilities, axis=1)
return result
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):
num_extend = FLAGS.moe_num_extend
num_layers = num_extend
lstm_size = FLAGS.lstm_cells
pool_size=2
cnn_input = model_input
num_filters=[256,256,512]
filter_sizes=[1,2,3]
features_size = sum(num_filters)
final_probilities = []
moe_inputs = []
for layer in range(num_layers):
cnn_output, num_t = LstmMultiscaleModel().cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
cnn_output = tf.nn.relu(cnn_output)
cnn_multiscale = LstmMultiscaleModel().rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1))
moe_inputs.append(cnn_multiscale)
final_probility = LstmMultiscaleModel().sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1))
final_probilities.append(final_probility)
num_t = pool_size*(num_t//pool_size)
cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
cnn_input = tf.reduce_max(cnn_output, axis=2)
num_frames = tf.maximum(num_frames//pool_size,1)
final_probilities = tf.stack(final_probilities, axis=1)
moe_inputs = tf.stack(moe_inputs, axis=1)
weight2d = tf.get_variable("ensemble_weight2d",
shape=[num_extend, features_size, vocab_size],
regularizer=slim.l2_regularizer(1.0e-8))
weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", tf.stop_gradient(moe_inputs), weight2d), dim=1)
result = {}
result["predictions"] = tf.reduce_sum(tf.stop_gradient(final_probilities)*weight, axis=1)
return result
def resize_axis(tensor, axis, new_size, fill_value=0):
"""Truncates or pads a tensor to new_size on on a given axis.
Truncate or extend tensor such that tensor.shape[axis] == new_size. If the
size increases, the padding will be performed at the end, using fill_value.
Args:
tensor: The tensor to be resized.
axis: An integer representing the dimension to be sliced.
new_size: An integer or 0d tensor representing the new value for
tensor.shape[axis].
fill_value: Value to use to fill any new entries in the tensor. Will be
cast to the type of tensor.
Returns:
The resized tensor.
"""
tensor = tf.convert_to_tensor(tensor)
shape = tf.unstack(tf.shape(tensor))
pad_shape = shape[:]
pad_shape[axis] = tf.maximum(0, new_size - shape[axis])
shape[axis] = tf.minimum(shape[axis], new_size)
shape = tf.stack(shape)
resized = tf.concat([
tf.slice(tensor, tf.zeros_like(shape), shape),
tf.fill(tf.stack(pad_shape), tf.cast(fill_value, tensor.dtype))
], axis)
# Update shape.
new_shape = tensor.get_shape().as_list() # A copy is being made.
new_shape[axis] = new_size
resized.set_shape(new_shape)
return resized
def get_video_matrix(self,
features,
feature_size,
max_frames,
max_quantized_value,
min_quantized_value):
"""Decodes features from an input string and quantizes it.
Args:
features: raw feature values
feature_size: length of each frame feature vector
max_frames: number of frames (rows) in the output feature_matrix
max_quantized_value: the maximum of the quantized value.
min_quantized_value: the minimum of the quantized value.
Returns:
feature_matrix: matrix of all frame-features
num_frames: number of frames in the sequence
"""
decoded_features = tf.reshape(
tf.cast(tf.decode_raw(features, tf.uint8), tf.float32),
[-1, feature_size])
num_frames = tf.minimum(tf.shape(decoded_features)[0], max_frames)
feature_matrix = utils.Dequantize(decoded_features,
max_quantized_value,
min_quantized_value)
feature_matrix = resize_axis(feature_matrix, 0, max_frames)
return feature_matrix, num_frames
def SampleRandomSequence(model_input, num_frames, num_samples):
"""Samples a random sequence of frames of size num_samples.
Args:
model_input: A tensor of size batch_size x max_frames x feature_size
num_frames: A tensor of size batch_size x 1
num_samples: A scalar
Returns:
`model_input`: A tensor of size batch_size x num_samples x feature_size
"""
batch_size = tf.shape(model_input)[0]
frame_index_offset = tf.tile(
tf.expand_dims(tf.range(num_samples), 0), [batch_size, 1])
max_start_frame_index = tf.maximum(num_frames - num_samples, 0)
start_frame_index = tf.cast(
tf.multiply(
tf.random_uniform([batch_size, 1]),
tf.cast(max_start_frame_index + 1, tf.float32)), tf.int32)
frame_index = tf.minimum(start_frame_index + frame_index_offset,
tf.cast(num_frames - 1, tf.int32))
batch_index = tf.tile(
tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
index = tf.stack([batch_index, frame_index], 2)
return tf.gather_nd(model_input, index)
def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
with tf.name_scope("loss_hinge"):
float_labels = tf.cast(labels, tf.float32)
all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
hinge_loss = tf.maximum(
all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions)
return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
def calculate_loss(self, predictions, labels, margin=0.2, adaptive=3.0, origin=1.0, **unused_params):
batch_size = FLAGS.batch_size
num_classes = FLAGS.num_classes
with tf.name_scope("loss_hinge"):
# get sim_neg
mask = tf.cast(labels, tf.float32)
reverse_mask = 1.0 - mask
min_true_pred = tf.reduce_min((predictions - 1.0) * mask, axis=1, keep_dims=True) + 1.0
mask_wrong = tf.stop_gradient(tf.cast(predictions > (min_true_pred - margin), tf.float32) * reverse_mask)
# get positve samples
int_labels = tf.cast(labels, tf.int32)
sample_labels = tf.unstack(int_labels, num=batch_size, axis=0)
sample_predictions = tf.unstack(predictions, num=batch_size, axis=0)
positive_predictions = []
for sample_label, sample_prediction in zip(sample_labels, sample_predictions):
indices = tf.where(sample_label > 0)
expanded_indices = tf.tile(indices[:,0], [num_classes])[:num_classes]
rand_arrange = tf.random_uniform([num_classes], minval=0, maxval=num_classes, dtype=tf.int32)
positive_indices = tf.stop_gradient(tf.gather(expanded_indices, rand_arrange))
positive_prediction = tf.gather(sample_prediction, positive_indices)
positive_predictions.append(positive_prediction)
positive_predictions = tf.stack(positive_predictions)
# hinge_loss
hinge_loss = tf.maximum(predictions - positive_predictions + margin, 0.0)
adaptive_loss = hinge_loss * mask_wrong
adaptive_loss = tf.reduce_mean(tf.reduce_sum(adaptive_loss, axis=1))
origin_loss = hinge_loss * reverse_mask
origin_loss = tf.reduce_mean(tf.reduce_sum(origin_loss, axis=1))
loss = adaptive * adaptive_loss + origin * origin_loss
return loss
def calculate_loss(self, predictions, labels, **unused_params):
with tf.name_scope("loss_softmax"):
epsilon = 10e-8
float_labels = tf.cast(labels, tf.float32)
# l1 normalization (labels are no less than 0)
label_rowsum = tf.maximum(
tf.reduce_sum(float_labels, 1, keep_dims=True),
epsilon)
norm_float_labels = tf.div(float_labels, label_rowsum)
softmax_outputs = tf.nn.softmax(predictions)
softmax_loss = tf.negative(tf.reduce_sum(
tf.multiply(norm_float_labels, tf.log(softmax_outputs)), 1))
return tf.reduce_mean(softmax_loss)
def calculate_loss(self, predictions, labels, topk=20, **unused_params):
with tf.name_scope("loss_xent_batch"):
batch_agreement = FLAGS.batch_agreement
epsilon = 10e-6
float_batch_size = float(FLAGS.batch_size)
topk_predictions, _ = tf.nn.top_k(predictions, k=20)
min_topk_predictions = tf.reduce_min(topk_predictions, axis=1, keep_dims=True)
topk_mask = tf.cast(predictions >= min_topk_predictions, dtype=tf.float32)
float_labels = tf.cast(labels, tf.float32)
cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
1 - float_labels) * tf.log(1 - predictions + epsilon)
cross_entropy_loss = tf.negative(cross_entropy_loss)
# minimum positive predictions in topk
positive_predictions = (predictions * float_labels * topk_mask) + 1.0 - (float_labels * topk_mask)
min_pp = tf.reduce_min(positive_predictions)
# maximum negative predictions
negative_predictions = predictions * (1.0 - float_labels)
max_np = tf.reduce_max(negative_predictions)
# 1s that fall under top-k
false_negatives = tf.cast(predictions < min_topk_predictions, tf.float32) * float_labels
# 0s that grow over 1s in top-k
false_positives = tf.cast(predictions > min_pp, tf.float32) * (1.0 - float_labels) * topk_mask
weight = (false_negatives + false_positives) * batch_agreement + 1.0
weight = tf.stop_gradient(weight)
print weight
return tf.reduce_mean(tf.reduce_sum(weight * cross_entropy_loss, 1))
def create_model(self, model_input, vocab_size, num_frames,
l2_penalty=1e-8, **unused_params):
num_layers = FLAGS.multiscale_cnn_lstm_layers
lstm_size = int(FLAGS.lstm_cells)
pool_size=2
num_filters=[256,256,512]
filter_sizes=[1,2,3]
features_size = sum(num_filters)
sub_predictions = []
cnn_input = model_input
cnn_max_frames = model_input.get_shape().as_list()[1]
for layer in range(num_layers):
cnn_output = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
cnn_output_relu = tf.nn.relu(cnn_output)
lstm_memory = self.rnn(cnn_output_relu, lstm_size, num_frames, sub_scope="rnn%d"%(layer+1))
sub_prediction = self.moe(lstm_memory, vocab_size, scopename="moe%d"%(layer+1))
sub_predictions.append(sub_prediction)
cnn_max_frames /= pool_size
max_pooled_cnn_output = tf.reduce_max(
tf.reshape(
cnn_output_relu[:, :cnn_max_frames*2, :],
[-1, cnn_max_frames, pool_size, features_size]
), axis=2)
# for the next cnn layer
cnn_input = max_pooled_cnn_output
num_frames = tf.maximum(num_frames/pool_size, 1)
support_predictions = tf.concat(sub_predictions, axis=1)
predictions = tf.add_n(sub_predictions) / len(sub_predictions)
return {"predictions": predictions,
"support_predictions": support_predictions}
cnn_lstm_memory_normalization_model.py 文件源码
项目:youtube-8m
作者: wangheda
项目源码
文件源码
阅读 64
收藏 0
点赞 0
评论 0
def layer_normalize(self, input_raw, epsilon=1e-8):
feature_dim = len(input_raw.get_shape()) - 1
mean_input = tf.reduce_mean(input_raw, axis=feature_dim, keep_dims=True)
std_input = tf.sqrt(tf.reduce_mean(tf.square(input_raw-mean_input), axis=feature_dim, keep_dims=True))
std_input = tf.maximum(std_input, epsilon)
output = (input_raw - mean_input) / std_input
return output
def layer_normalize(self, input_raw, epsilon=1e-8):
feature_dim = len(input_raw.get_shape()) - 1
mean_input = tf.reduce_mean(input_raw, axis=feature_dim, keep_dims=True)
std_input = tf.sqrt(tf.reduce_mean(tf.square(input_raw-mean_input), axis=feature_dim, keep_dims=True))
std_input = tf.maximum(std_input, epsilon)
output = (input_raw - mean_input) / std_input
return output
def augment(self, model_input_raw, num_frames, labels_batch, **unused_params):
assert(FLAGS.frame_features,
"HalfAugmenter only works with frame feature")
print "using HalfAugmeter"
feature_dim = len(model_input_raw.get_shape()) - 1
frame_dim = len(model_input_raw.get_shape()) - 2
max_frame = model_input_raw.get_shape().as_list()[frame_dim]
seg_length = max(int(max_frame / 2), 1)
seg_num_frames = tf.maximum(num_frames / 2, 1)
seg_inputs = []
seg_frames = []
seg_labels = []
seg_inputs.append(model_input_raw)
seg_frames.append(num_frames)
seg_labels.append(labels_batch)
for i in xrange(2):
begin_frames = tf.reshape(seg_num_frames*i, [-1,1])
frames_index = tf.reshape(tf.range(seg_length), [1,seg_length])
frames_index = begin_frames + frames_index
batch_size = tf.shape(model_input_raw)[0]
batch_index = tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, seg_length])
index = tf.stack([batch_index, tf.cast(frames_index,dtype=tf.int32)], 2)
seg_input = tf.gather_nd(model_input_raw, index)
seg_input = tf.pad(seg_input, paddings=[[0,0],[0, max_frame-seg_length],[0,0]])
seg_input = seg_input * tf.expand_dims(tf.sequence_mask(seg_num_frames, maxlen=max_frame, dtype=tf.float32), axis=2)
seg_inputs.append(seg_input)
seg_frames.append(seg_num_frames)
seg_labels.append(labels_batch)
new_input_raw = tf.concat(seg_inputs, axis=0)
new_num_frames = tf.concat(seg_frames, axis=0)
new_labels_batch = tf.concat(seg_labels, axis=0)
return new_input_raw, new_labels_batch, new_num_frames
def frame_augment(self, model_input_raw, num_frames, labels_batch, **unused_params):
feature_dim = len(model_input_raw.get_shape()) - 1
frame_dim = len(model_input_raw.get_shape()) - 2
max_frame = model_input_raw.get_shape().as_list()[frame_dim]
seg_length = max(int(max_frame / 2), 1)
seg_num_frames = tf.maximum(num_frames / 2, 1)
seg_inputs = []
seg_frames = []
seg_labels = []
seg_inputs.append(model_input_raw)
seg_frames.append(num_frames)
seg_labels.append(labels_batch)
for i in xrange(2):
begin_frames = tf.reshape(seg_num_frames*i, [-1,1])
frames_index = tf.reshape(tf.range(seg_length), [1,seg_length])
frames_index = begin_frames + frames_index
batch_size = tf.shape(model_input_raw)[0]
batch_index = tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, seg_length])
index = tf.stack([batch_index, tf.cast(frames_index,dtype=tf.int32)], 2)
seg_input = tf.gather_nd(model_input_raw, index)
seg_input = tf.pad(seg_input, paddings=[[0,0],[0, max_frame-seg_length],[0,0]])
seg_input = seg_input * tf.expand_dims(tf.sequence_mask(seg_num_frames, maxlen=max_frame, dtype=tf.float32), axis=2)
seg_inputs.append(seg_input)
seg_frames.append(seg_num_frames)
seg_labels.append(labels_batch)
new_input_raw = tf.concat(seg_inputs, axis=0)
new_num_frames = tf.concat(seg_frames, axis=0)
new_labels_batch = tf.concat(seg_labels, axis=0)
return new_input_raw, new_labels_batch, new_num_frames