def _parse_example_proto(example_serialized):
# parse record
# decode jpeg
# random select one caption, convert it into integers
# compute the length of the caption
feature_map = {
'image/encoded': tf.FixedLenFeature([], dtype=tf.string),
'image/coco-id': tf.FixedLenFeature([], dtype=tf.int64),
'caption': tf.VarLenFeature(dtype=tf.string),
# 'image/path': tf.FixedLenFeature([], dtype=tf.string),
}
features = tf.parse_single_example(example_serialized, feature_map)
cocoid = features['image/coco-id']
image = tf.image.decode_jpeg(
features['image/encoded'],
channels=3,
try_recover_truncated=True)
# the image COCO_train2014_000000167126.jpg was corrupted
# replaced that image in my train2014/ directory
# but do not want to re encode everything, so just try_recover_truncated
# which is just part of the image
# [0,255) --> [0,1)
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
#image_path = features['image/path']
caption = tf.sparse_tensor_to_dense(features['caption'], default_value=".")
caption = tf.random_shuffle(caption)[0]
record_defaults = [[PAD]] * MAX_SEQ_LEN
caption_tids = tf.decode_csv(caption, record_defaults)
caption_tids = tf.pack(caption_tids)
return image, caption_tids, cocoid #, image_path
评论列表
文章目录