def tf_Examples(data_path, num_epochs=None):
"""Generates tf.Examples from path of data files.
Binary data format: <length><blob>. <length> represents the byte size
of <blob>. <blob> is serialized tf.Example proto. The tf.Example contains
the tokenized article text and summary.
Args:
data_path: path to tf.Example data files.
num_epochs: Number of times to go through the data. None means infinite.
Yields:
Deserialized tf.Example.
If there are multiple files specified, they accessed in a random order.
"""
epoch = 0
while True:
if num_epochs is not None and epoch >= num_epochs:
break
filelist = glob.glob(data_path)
assert filelist, 'Empty filelist.'
shuffle(filelist)
for f in filelist:
reader = open(f, 'rb')
while True:
len_bytes = reader.read(8)
if not len_bytes: break
str_len = struct.unpack('q', len_bytes)[0]
example_str = struct.unpack('%ds' % str_len, reader.read(str_len))[0]
yield example_pb2.Example.FromString(example_str)
epoch += 1
dataset.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录