def _handle_rare_words(self, captions):
if self._rare_words_handling == 'nothing':
return captions
elif self._rare_words_handling == 'discard':
tokenizer = Tokenizer()
tokenizer.fit_on_texts(captions)
new_captions = []
for caption in captions:
words = text_to_word_sequence(caption)
new_words = [w for w in words
if tokenizer.word_counts.get(w, 0) >=
self._words_min_occur]
new_captions.append(' '.join(new_words))
return new_captions
raise NotImplementedError('rare_words_handling={} is not implemented '
'yet!'.format(self._rare_words_handling))
评论列表
文章目录