def preprocess_input_sequences(self, data, shuffle=True):
"""
??????
shuffle
PAD/TRUNC????????
y_true????self.A_len????index=0??????one-hot??
"""
documents, questions, answer, candidates = self.union_shuffle(data) if shuffle else data
d_lens = [len(i) for i in documents]
questions_ok = pad_sequences(questions, maxlen=self.q_len, dtype="int32", padding="post", truncating="post")
documents_ok = pad_sequences(documents, maxlen=self.d_len, dtype="int32", padding="post", truncating="post")
context_mask = K.eval(tf.sequence_mask(d_lens, self.d_len, dtype=tf.float32))
candidates_ok = pad_sequences(candidates, maxlen=self.A_len, dtype="int32", padding="post", truncating="post")
y_true = np.zeros_like(candidates_ok)
y_true[:, 0] = 1
return questions_ok, documents_ok, context_mask, candidates_ok, y_true
评论列表
文章目录