def __init__(self, rtepair, stop=True, lemmatize=False):
"""
:param rtepair: a ``RTEPair`` from which features should be extracted, (txt, hyp)
:param stop: if ``True``, stopwords are thrown away.
:type stop: bool
"""
global stop_word_path
self.stop = stop
self.stopwords = codecs.open(stop_word_path + 'stopwords.txt', encoding='UTF-8').read()
self.negwords = set([u"?", u"??", u"??", u"?", u"??", u"??", u"??", u"??", u"??"])
text_words = pseg.lcut(rtepair[0])
hyp_words = pseg.lcut(rtepair[1])
self.text_words = set()
self.hyp_words = set()
# ??????????????
pass
# ?? wordnet ????????
if lemmatize:
pass
# ????
for word, flag in text_words:
if word not in self.stopwords:
self.text_words.add((word, flag))
for word, flag in hyp_words:
if word not in self.stopwords:
self.hyp_words.add((word, flag))
# ????
self._overlap = self.hyp_words & self.text_words # hyp ? text??
self._hyp_extra = self.hyp_words - self.text_words # hyp? text??
self._txt_extra = self.text_words - self.hyp_words # text? hyp??
textual_entailment.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录