def preprocess_batch(self, captions_label_encoded):
captions = keras_seq.pad_sequences(captions_label_encoded,
padding='post')
# Because the number of timesteps/words resulted by the model is
# maxlen(captions) + 1 (because the first "word" is the image).
captions_extended1 = keras_seq.pad_sequences(captions,
maxlen=captions.shape[-1] + 1,
padding='post')
captions_one_hot = map(self._tokenizer.sequences_to_matrix,
np.expand_dims(captions_extended1, -1))
captions_one_hot = np.array(captions_one_hot, dtype='int')
# Decrease/shift word index by 1.
# Shifting `captions_one_hot` makes the padding word
# (index=0, encoded=[1, 0, ...]) encoded all zeros ([0, 0, ...]),
# so its cross entropy loss will be zero.
captions_decreased = captions.copy()
captions_decreased[captions_decreased > 0] -= 1
captions_one_hot_shifted = captions_one_hot[:, :, 1:]
captions_input = captions_decreased
captions_output = captions_one_hot_shifted
return captions_input, captions_output
评论列表
文章目录