textual_entailment.py 文件源码-python代码片段

textual_entailment.py 文件源码

python

阅读 20 收藏 0 点赞 0 评论 0

项目：Book_DeepLearning_Practice 作者: wac81 项目源码文件源码

def __init__(self, rtepair, stop=True, lemmatize=False):
        """
        :param rtepair: a ``RTEPair`` from which features should be extracted, (txt, hyp)
        :param stop: if ``True``, stopwords are thrown away.
        :type stop: bool
        """
        global stop_word_path
        self.stop = stop
        self.stopwords = codecs.open(stop_word_path + 'stopwords.txt', encoding='UTF-8').read()
        self.negwords = set([u"?", u"??", u"??", u"?", u"??", u"??", u"??", u"??", u"??"])

        text_words = pseg.lcut(rtepair[0])
        hyp_words = pseg.lcut(rtepair[1])
        self.text_words = set()
        self.hyp_words = set()

        # ??????????????
        pass

        # ?? wordnet ????????
        if lemmatize:
            pass

        # ????
        for word, flag in text_words:
            if word not in self.stopwords:
                self.text_words.add((word, flag))

        for word, flag in hyp_words:
            if word not in self.stopwords:
                self.hyp_words.add((word, flag))

        # ????
        self._overlap = self.hyp_words & self.text_words        # hyp ? text??
        self._hyp_extra = self.hyp_words - self.text_words      # hyp? text??
        self._txt_extra = self.text_words - self.hyp_words      # text? hyp??