python类maximum()的实例源码

preprocess.py 文件源码 项目:deep_learning_study 作者: jowettcz 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def pre_process_data(image,training):
    if training:
        image = tf.random_crop(image,size=[img_size_cropped,img_size_cropped,cifar10.num_channels])

        image = tf.image.flip_left_right(image)
        image = tf.image.random_hue(image)
        image = tf.image.random_contrast(image)
        image = tf.image.random_saturation(image)
        image = tf.image.random_brightness(image)

        image = tf.maximum(image,1.0)
        image = tf.minimum(image,0.0)
    else:
        #for testing image
        image = tf.image.resize_image_with_crop_or_pad(image,img_size_cropped,img_size_cropped);

    return image
impute.py 文件源码 项目:aboleth 作者: data61 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def _impute2D(self, X_2D):
        r"""Mean impute a rank 2 tensor."""
        # Fill zeros in for missing data initially
        data_zeroed_missing_tf = X_2D * self.real_val_mask

        # Sum the real values in each column
        col_tot = tf.reduce_sum(data_zeroed_missing_tf, 0)

        # Divide column totals by the number of non-nan values
        num_values_col = tf.reduce_sum(self.real_val_mask, 0)
        num_values_col = tf.maximum(num_values_col,
                                    tf.ones(tf.shape(num_values_col)))
        col_nan_means = tf.div(col_tot, num_values_col)

        # Make an vector of the impute values for each missing point
        imputed_vals = tf.gather(col_nan_means, self.missing_ind[:, 1])

        # Fill the imputed values into the data tensor of zeros
        shape = tf.cast(tf.shape(data_zeroed_missing_tf), dtype=tf.int64)
        missing_imputed = tf.scatter_nd(self.missing_ind, imputed_vals, shape)

        X_with_impute = data_zeroed_missing_tf + missing_imputed

        return X_with_impute
nn_skeleton.py 文件源码 项目:squeezeDet-hand 作者: fyhtea 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def filter_prediction(self, boxes, probs, cls_idx):
    """Filter bounding box predictions with probability threshold and
    non-maximum supression.

    Args:
      boxes: array of [cx, cy, w, h].
      probs: array of probabilities
      cls_idx: array of class indices
    Returns:
      final_boxes: array of filtered bounding boxes.
      final_probs: array of filtered probabilities
      final_cls_idx: array of filtered class indices
    """
    mc = self.mc

    if mc.TOP_N_DETECTION < len(probs) and mc.TOP_N_DETECTION > 0:
      order = probs.argsort()[:-mc.TOP_N_DETECTION-1:-1]
      probs = probs[order]
      boxes = boxes[order]
      cls_idx = cls_idx[order]
    else:
      filtered_idx = np.nonzero(probs>mc.PROB_THRESH)[0]
      probs = probs[filtered_idx]
      boxes = boxes[filtered_idx]
      cls_idx = cls_idx[filtered_idx]

    final_boxes = []
    final_probs = []
    final_cls_idx = []

    for c in range(mc.CLASSES):
      idx_per_class = [i for i in range(len(probs)) if cls_idx[i] == c]
      keep = util.nms(boxes[idx_per_class], probs[idx_per_class], mc.NMS_THRESH)
      for i in range(len(keep)):
        if keep[i]:
          final_boxes.append(boxes[idx_per_class[i]])
          final_probs.append(probs[idx_per_class[i]])
          final_cls_idx.append(c)
    return final_boxes, final_probs, final_cls_idx
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        lstm_size = FLAGS.lstm_cells
        pool_size=2
        cnn_input = model_input
        num_filters=[256,256,512]
        filter_sizes=[1,2,3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []
        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1))
            moe_inputs.append(cnn_multiscale)
            final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)
            num_frames = tf.maximum(num_frames//pool_size,1)

        final_probilities = tf.stack(final_probilities,axis=1)
        moe_inputs = tf.stack(moe_inputs,axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
        result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
        return result
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, distill_labels=None, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        lstm_size = FLAGS.lstm_cells
        pool_size = 2
        cnn_input = model_input
        cnn_size = FLAGS.cnn_cells
        num_filters = [cnn_size, cnn_size, cnn_size*2]
        filter_sizes = [1, 2, 3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []

        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1))
            moe_inputs.append(cnn_multiscale)
            final_probility = self.sub_moe(cnn_multiscale,vocab_size,distill_labels=distill_labels, scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)
            num_frames = tf.maximum(num_frames//pool_size,1)

        final_probilities = tf.stack(final_probilities,axis=1)
        moe_inputs = tf.stack(moe_inputs,axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, lstm_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size])
        result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
        return result
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        lstm_size = FLAGS.lstm_cells
        pool_size = 2
        cnn_input = model_input
        num_filters = [256, 256, 512]
        filter_sizes = [1, 2, 3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []
        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = self.rnn_gate(cnn_output, lstm_size, num_frames, sub_scope="rnn%d"%(layer+1))
            moe_inputs.append(cnn_multiscale)
            final_probility = self.sub_moe(cnn_multiscale, vocab_size, scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)
            num_frames = tf.maximum(num_frames//pool_size,1)

        final_probilities = tf.stack(final_probilities, axis=1)
        moe_inputs = tf.stack(moe_inputs, axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1, vocab_size])
        result["predictions"] = tf.reduce_mean(final_probilities, axis=1)
        return result
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        lstm_size = FLAGS.lstm_cells
        pool_size = 2
        cnn_input = model_input
        num_filters = [256, 256, 512]
        filter_sizes = [1, 2, 3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []
        for layer in range(num_layers):
            cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = self.rnn_glu(cnn_output, lstm_size, num_frames, sub_scope="rnn%d"%(layer+1))
            moe_inputs.append(cnn_multiscale)
            final_probility = self.sub_moe(cnn_multiscale, vocab_size, scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)
            num_frames = tf.maximum(num_frames//pool_size,1)

        final_probilities = tf.stack(final_probilities, axis=1)
        moe_inputs = tf.stack(moe_inputs, axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1)
        result = {}
        result["prediction_frames"] = tf.reshape(final_probilities,[-1, vocab_size])
        result["predictions"] = tf.reduce_mean(final_probilities, axis=1)
        return result
frame_level_models.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):

        num_extend = FLAGS.moe_num_extend
        num_layers = num_extend
        lstm_size = FLAGS.lstm_cells
        pool_size=2
        cnn_input = model_input
        num_filters=[256,256,512]
        filter_sizes=[1,2,3]
        features_size = sum(num_filters)
        final_probilities = []
        moe_inputs = []

        for layer in range(num_layers):
            cnn_output, num_t = LstmMultiscaleModel().cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
            cnn_output = tf.nn.relu(cnn_output)
            cnn_multiscale = LstmMultiscaleModel().rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1))
            moe_inputs.append(cnn_multiscale)
            final_probility = LstmMultiscaleModel().sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1))
            final_probilities.append(final_probility)
            num_t = pool_size*(num_t//pool_size)
            cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
            cnn_input = tf.reduce_max(cnn_output, axis=2)
            num_frames = tf.maximum(num_frames//pool_size,1)

        final_probilities = tf.stack(final_probilities, axis=1)
        moe_inputs = tf.stack(moe_inputs, axis=1)
        weight2d = tf.get_variable("ensemble_weight2d",
                                   shape=[num_extend, features_size, vocab_size],
                                   regularizer=slim.l2_regularizer(1.0e-8))
        weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", tf.stop_gradient(moe_inputs), weight2d), dim=1)
        result = {}
        result["predictions"] = tf.reduce_sum(tf.stop_gradient(final_probilities)*weight, axis=1)
        return result
readers.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def resize_axis(tensor, axis, new_size, fill_value=0):
  """Truncates or pads a tensor to new_size on on a given axis.

  Truncate or extend tensor such that tensor.shape[axis] == new_size. If the
  size increases, the padding will be performed at the end, using fill_value.

  Args:
    tensor: The tensor to be resized.
    axis: An integer representing the dimension to be sliced.
    new_size: An integer or 0d tensor representing the new value for
      tensor.shape[axis].
    fill_value: Value to use to fill any new entries in the tensor. Will be
      cast to the type of tensor.

  Returns:
    The resized tensor.
  """
  tensor = tf.convert_to_tensor(tensor)
  shape = tf.unstack(tf.shape(tensor))

  pad_shape = shape[:]
  pad_shape[axis] = tf.maximum(0, new_size - shape[axis])

  shape[axis] = tf.minimum(shape[axis], new_size)
  shape = tf.stack(shape)

  resized = tf.concat([
      tf.slice(tensor, tf.zeros_like(shape), shape),
      tf.fill(tf.stack(pad_shape), tf.cast(fill_value, tensor.dtype))
  ], axis)

  # Update shape.
  new_shape = tensor.get_shape().as_list()  # A copy is being made.
  new_shape[axis] = new_size
  resized.set_shape(new_shape)
  return resized
readers.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def get_video_matrix(self,
                       features,
                       feature_size,
                       max_frames,
                       max_quantized_value,
                       min_quantized_value):
    """Decodes features from an input string and quantizes it.

    Args:
      features: raw feature values
      feature_size: length of each frame feature vector
      max_frames: number of frames (rows) in the output feature_matrix
      max_quantized_value: the maximum of the quantized value.
      min_quantized_value: the minimum of the quantized value.

    Returns:
      feature_matrix: matrix of all frame-features
      num_frames: number of frames in the sequence
    """
    decoded_features = tf.reshape(
        tf.cast(tf.decode_raw(features, tf.uint8), tf.float32),
        [-1, feature_size])

    num_frames = tf.minimum(tf.shape(decoded_features)[0], max_frames)
    feature_matrix = utils.Dequantize(decoded_features,
                                      max_quantized_value,
                                      min_quantized_value)
    feature_matrix = resize_axis(feature_matrix, 0, max_frames)
    return feature_matrix, num_frames
model_utils.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def SampleRandomSequence(model_input, num_frames, num_samples):
  """Samples a random sequence of frames of size num_samples.

  Args:
    model_input: A tensor of size batch_size x max_frames x feature_size
    num_frames: A tensor of size batch_size x 1
    num_samples: A scalar

  Returns:
    `model_input`: A tensor of size batch_size x num_samples x feature_size
  """

  batch_size = tf.shape(model_input)[0]
  frame_index_offset = tf.tile(
      tf.expand_dims(tf.range(num_samples), 0), [batch_size, 1])
  max_start_frame_index = tf.maximum(num_frames - num_samples, 0)
  start_frame_index = tf.cast(
      tf.multiply(
          tf.random_uniform([batch_size, 1]),
          tf.cast(max_start_frame_index + 1, tf.float32)), tf.int32)
  frame_index = tf.minimum(start_frame_index + frame_index_offset,
                           tf.cast(num_frames - 1, tf.int32))
  batch_index = tf.tile(
      tf.expand_dims(tf.range(batch_size), 1), [1, num_samples])
  index = tf.stack([batch_index, frame_index], 2)
  return tf.gather_nd(model_input, index)
losses.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def calculate_loss(self, predictions, labels, b=1.0, **unused_params):
    with tf.name_scope("loss_hinge"):
      float_labels = tf.cast(labels, tf.float32)
      all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32)
      all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32)
      sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones)
      hinge_loss = tf.maximum(
          all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions)
      return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
losses.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def calculate_loss(self, predictions, labels, margin=0.2, adaptive=3.0, origin=1.0, **unused_params):
    batch_size = FLAGS.batch_size
    num_classes = FLAGS.num_classes
    with tf.name_scope("loss_hinge"):
      # get sim_neg
      mask = tf.cast(labels, tf.float32)
      reverse_mask = 1.0 - mask
      min_true_pred = tf.reduce_min((predictions - 1.0) * mask, axis=1, keep_dims=True) + 1.0
      mask_wrong = tf.stop_gradient(tf.cast(predictions > (min_true_pred - margin), tf.float32) * reverse_mask)
      # get positve samples
      int_labels = tf.cast(labels, tf.int32)
      sample_labels = tf.unstack(int_labels, num=batch_size, axis=0)
      sample_predictions = tf.unstack(predictions, num=batch_size, axis=0)
      positive_predictions = []
      for sample_label, sample_prediction in zip(sample_labels, sample_predictions):
        indices = tf.where(sample_label > 0)
        expanded_indices = tf.tile(indices[:,0], [num_classes])[:num_classes]
        rand_arrange = tf.random_uniform([num_classes], minval=0, maxval=num_classes, dtype=tf.int32)
        positive_indices = tf.stop_gradient(tf.gather(expanded_indices, rand_arrange))
        positive_prediction = tf.gather(sample_prediction, positive_indices)
        positive_predictions.append(positive_prediction)
      positive_predictions = tf.stack(positive_predictions)
      # hinge_loss
      hinge_loss = tf.maximum(predictions - positive_predictions + margin, 0.0)
      adaptive_loss = hinge_loss * mask_wrong
      adaptive_loss = tf.reduce_mean(tf.reduce_sum(adaptive_loss, axis=1))
      origin_loss = hinge_loss * reverse_mask
      origin_loss = tf.reduce_mean(tf.reduce_sum(origin_loss, axis=1))
      loss = adaptive * adaptive_loss + origin * origin_loss
      return loss
losses.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def calculate_loss(self, predictions, labels, **unused_params):
    with tf.name_scope("loss_softmax"):
      epsilon = 10e-8
      float_labels = tf.cast(labels, tf.float32)
      # l1 normalization (labels are no less than 0)
      label_rowsum = tf.maximum(
          tf.reduce_sum(float_labels, 1, keep_dims=True),
          epsilon)
      norm_float_labels = tf.div(float_labels, label_rowsum)
      softmax_outputs = tf.nn.softmax(predictions)
      softmax_loss = tf.negative(tf.reduce_sum(
          tf.multiply(norm_float_labels, tf.log(softmax_outputs)), 1))
    return tf.reduce_mean(softmax_loss)
losses.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def calculate_loss(self, predictions, labels, topk=20, **unused_params):
    with tf.name_scope("loss_xent_batch"):
      batch_agreement = FLAGS.batch_agreement
      epsilon = 10e-6
      float_batch_size = float(FLAGS.batch_size)

      topk_predictions, _ = tf.nn.top_k(predictions, k=20)
      min_topk_predictions = tf.reduce_min(topk_predictions, axis=1, keep_dims=True)
      topk_mask = tf.cast(predictions >= min_topk_predictions, dtype=tf.float32)

      float_labels = tf.cast(labels, tf.float32)
      cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + (
          1 - float_labels) * tf.log(1 - predictions + epsilon)
      cross_entropy_loss = tf.negative(cross_entropy_loss)

      # minimum positive predictions in topk
      positive_predictions = (predictions * float_labels * topk_mask) + 1.0 - (float_labels * topk_mask)
      min_pp = tf.reduce_min(positive_predictions)

      # maximum negative predictions
      negative_predictions = predictions * (1.0 - float_labels)
      max_np = tf.reduce_max(negative_predictions)

      # 1s that fall under top-k
      false_negatives = tf.cast(predictions < min_topk_predictions, tf.float32) * float_labels
      # 0s that grow over 1s in top-k
      false_positives = tf.cast(predictions > min_pp, tf.float32) * (1.0 - float_labels) * topk_mask

      weight = (false_negatives + false_positives) * batch_agreement + 1.0
      weight = tf.stop_gradient(weight)
      print weight
      return tf.reduce_mean(tf.reduce_sum(weight * cross_entropy_loss, 1))
multiscale_cnn_lstm_model.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def create_model(self, model_input, vocab_size, num_frames, 
                   l2_penalty=1e-8, **unused_params):

    num_layers = FLAGS.multiscale_cnn_lstm_layers
    lstm_size = int(FLAGS.lstm_cells)
    pool_size=2
    num_filters=[256,256,512]
    filter_sizes=[1,2,3]
    features_size = sum(num_filters)

    sub_predictions = []
    cnn_input = model_input

    cnn_max_frames = model_input.get_shape().as_list()[1]

    for layer in range(num_layers):
      cnn_output = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
      cnn_output_relu = tf.nn.relu(cnn_output)

      lstm_memory = self.rnn(cnn_output_relu, lstm_size, num_frames, sub_scope="rnn%d"%(layer+1))
      sub_prediction = self.moe(lstm_memory, vocab_size, scopename="moe%d"%(layer+1))
      sub_predictions.append(sub_prediction)

      cnn_max_frames /= pool_size
      max_pooled_cnn_output = tf.reduce_max(
          tf.reshape(
              cnn_output_relu[:, :cnn_max_frames*2, :], 
              [-1, cnn_max_frames, pool_size, features_size]
          ), axis=2)

      # for the next cnn layer
      cnn_input = max_pooled_cnn_output
      num_frames = tf.maximum(num_frames/pool_size, 1)

    support_predictions = tf.concat(sub_predictions, axis=1)
    predictions = tf.add_n(sub_predictions) / len(sub_predictions)

    return {"predictions": predictions, 
            "support_predictions": support_predictions}
cnn_lstm_memory_normalization_model.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 64 收藏 0 点赞 0 评论 0
def layer_normalize(self, input_raw, epsilon=1e-8):
    feature_dim = len(input_raw.get_shape()) - 1
    mean_input = tf.reduce_mean(input_raw, axis=feature_dim, keep_dims=True)
    std_input = tf.sqrt(tf.reduce_mean(tf.square(input_raw-mean_input), axis=feature_dim, keep_dims=True))
    std_input = tf.maximum(std_input, epsilon)
    output = (input_raw - mean_input) / std_input
    return output
lstm_memory_normalization_model.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def layer_normalize(self, input_raw, epsilon=1e-8):
    feature_dim = len(input_raw.get_shape()) - 1
    mean_input = tf.reduce_mean(input_raw, axis=feature_dim, keep_dims=True)
    std_input = tf.sqrt(tf.reduce_mean(tf.square(input_raw-mean_input), axis=feature_dim, keep_dims=True))
    std_input = tf.maximum(std_input, epsilon)
    output = (input_raw - mean_input) / std_input
    return output
half_augmenter.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def augment(self, model_input_raw, num_frames, labels_batch, **unused_params):

    assert(FLAGS.frame_features, 
           "HalfAugmenter only works with frame feature")
    print "using HalfAugmeter"

    feature_dim = len(model_input_raw.get_shape()) - 1
    frame_dim = len(model_input_raw.get_shape()) - 2
    max_frame = model_input_raw.get_shape().as_list()[frame_dim]
    seg_length = max(int(max_frame / 2), 1)
    seg_num_frames = tf.maximum(num_frames / 2, 1)

    seg_inputs = []
    seg_frames = []
    seg_labels = []

    seg_inputs.append(model_input_raw)
    seg_frames.append(num_frames)
    seg_labels.append(labels_batch)

    for i in xrange(2):
      begin_frames = tf.reshape(seg_num_frames*i, [-1,1])
      frames_index = tf.reshape(tf.range(seg_length), [1,seg_length])
      frames_index = begin_frames + frames_index
      batch_size = tf.shape(model_input_raw)[0]
      batch_index = tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, seg_length])
      index = tf.stack([batch_index, tf.cast(frames_index,dtype=tf.int32)], 2)
      seg_input = tf.gather_nd(model_input_raw, index)
      seg_input = tf.pad(seg_input, paddings=[[0,0],[0, max_frame-seg_length],[0,0]])
      seg_input = seg_input * tf.expand_dims(tf.sequence_mask(seg_num_frames, maxlen=max_frame, dtype=tf.float32), axis=2)
      seg_inputs.append(seg_input)
      seg_frames.append(seg_num_frames)
      seg_labels.append(labels_batch)

    new_input_raw = tf.concat(seg_inputs, axis=0)
    new_num_frames = tf.concat(seg_frames, axis=0)
    new_labels_batch = tf.concat(seg_labels, axis=0)
    return new_input_raw, new_labels_batch, new_num_frames
half_video_augmenter.py 文件源码 项目:youtube-8m 作者: wangheda 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def frame_augment(self, model_input_raw, num_frames, labels_batch, **unused_params):
    feature_dim = len(model_input_raw.get_shape()) - 1
    frame_dim = len(model_input_raw.get_shape()) - 2
    max_frame = model_input_raw.get_shape().as_list()[frame_dim]
    seg_length = max(int(max_frame / 2), 1)
    seg_num_frames = tf.maximum(num_frames / 2, 1)

    seg_inputs = []
    seg_frames = []
    seg_labels = []

    seg_inputs.append(model_input_raw)
    seg_frames.append(num_frames)
    seg_labels.append(labels_batch)

    for i in xrange(2):
      begin_frames = tf.reshape(seg_num_frames*i, [-1,1])
      frames_index = tf.reshape(tf.range(seg_length), [1,seg_length])
      frames_index = begin_frames + frames_index
      batch_size = tf.shape(model_input_raw)[0]
      batch_index = tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, seg_length])
      index = tf.stack([batch_index, tf.cast(frames_index,dtype=tf.int32)], 2)
      seg_input = tf.gather_nd(model_input_raw, index)
      seg_input = tf.pad(seg_input, paddings=[[0,0],[0, max_frame-seg_length],[0,0]])
      seg_input = seg_input * tf.expand_dims(tf.sequence_mask(seg_num_frames, maxlen=max_frame, dtype=tf.float32), axis=2)
      seg_inputs.append(seg_input)
      seg_frames.append(seg_num_frames)
      seg_labels.append(labels_batch)

    new_input_raw = tf.concat(seg_inputs, axis=0)
    new_num_frames = tf.concat(seg_frames, axis=0)
    new_labels_batch = tf.concat(seg_labels, axis=0)
    return new_input_raw, new_labels_batch, new_num_frames


问题


面经


文章

微信
公众号

扫码关注公众号