coco_inputs.py 文件源码-python代码片段

def _parse_example_proto(example_serialized):
  # parse record
  # decode jpeg
  # random select one caption, convert it into integers
  # compute the length of the caption
  feature_map = {
      'image/encoded': tf.FixedLenFeature([], dtype=tf.string),
      'image/coco-id': tf.FixedLenFeature([], dtype=tf.int64),
      'caption': tf.VarLenFeature(dtype=tf.string),
      # 'image/path': tf.FixedLenFeature([], dtype=tf.string),
  }

  features = tf.parse_single_example(example_serialized, feature_map)

  cocoid = features['image/coco-id']
  image = tf.image.decode_jpeg(
      features['image/encoded'],
      channels=3,
      try_recover_truncated=True)
  # the image COCO_train2014_000000167126.jpg was corrupted
  # replaced that image in my train2014/ directory
  # but do not want to re encode everything, so just try_recover_truncated
  # which is just part of the image

  # [0,255) --> [0,1)
  image = tf.image.convert_image_dtype(image, dtype=tf.float32)

  #image_path = features['image/path']

  caption = tf.sparse_tensor_to_dense(features['caption'], default_value=".")
  caption = tf.random_shuffle(caption)[0]
  record_defaults = [[PAD]] * MAX_SEQ_LEN
  caption_tids = tf.decode_csv(caption, record_defaults)
  caption_tids = tf.pack(caption_tids)

  return image, caption_tids, cocoid #, image_path