def question_batches(data_file):
"""Iterates over a dataset returning batches composed by a single question
and its candidate answers.
:data_file: a HDF5 file object holding the dataset
:returns: a DataSet namedtuple of arrays (questions, sentences, labels).
"""
n_questions = np.asscalar(data_file['metadata/questions/count'][...])
questions_ds = data_file['data/questions']
sentences_ds = data_file['data/sentences']
for i in range(n_questions):
row_labels = data_file['data/labels/q%d' % i][...]
labels = row_labels[:, 1]
rows = row_labels[:, 0]
questions = questions_ds[rows, ...]
sentences = sentences_ds[rows, ...]
yield DataSet(questions, sentences, labels)
评论列表
文章目录