def _convert_obj(self, obj):
pre_sentence = obj['sentence1']
hyp_sentence = obj['sentence2']
if self.lower:
pre_sentence = pre_sentence.lower()
hyp_sentence = hyp_sentence.lower()
pre_words = word_tokenize(pre_sentence)
hyp_words = word_tokenize(hyp_sentence)
pre = [self.word_vocab.word_to_id(w) for w in pre_words]
hyp = [self.word_vocab.word_to_id(w) for w in hyp_words]
pre_length = len(pre)
hyp_length = len(hyp)
label = obj['gold_label']
if len(pre) > self._max_length or len(hyp) > self._max_length:
return None
if label == '-':
return None
label = self.label_vocab.word_to_id(label)
return pre, hyp, pre_length, hyp_length, label
评论列表
文章目录