CPTCorpus.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:cptm 作者: NLeSC 项目源码 文件源码
def __init__(self, input=None, topicDict=None, opinionDict=None,
                 testSplit=None, file_dict=None, topicLines=[0],
                 opinionLines=[1]):
        if not file_dict is None:
            logger.info('initialize CPT Corpus with file_dict: {} perspectives'
                        .format(len(file_dict)))
            self.perspectives = [Perspective(file_dict=file_dict.get(str(p)),
                                             topicLines=topicLines,
                                             opinionLines=opinionLines)
                                 for p in range(len(file_dict))]
        else:
            logger.info('initialize CPT Corpus with {} perspectives'
                        .format(len(input)))
            input.sort()
            self.perspectives = [Perspective(input=glob.glob('{}/*.txt'.
                                             format(d)), testSplit=testSplit,
                                             topicLines=topicLines,
                                             opinionLines=opinionLines)
                                 for d in input]
            self.input = input

        if isinstance(topicDict, str) or isinstance(topicDict, unicode):
            self.load_dictionaries(topicDict=topicDict)
        elif isinstance(topicDict, corpora.Dictionary):
            self.topicDictionary = topicDict

        if isinstance(opinionDict, str) or isinstance(opinionDict, unicode):
            self.load_dictionaries(opinionDict=opinionDict)
        elif isinstance(opinionDict, corpora.Dictionary):
            self.opinionDictionary = opinionDict

        if not topicDict or not opinionDict:
            self._create_corpus_wide_dictionaries()

        self.testSplit = testSplit
        self.nPerspectives = len(self.perspectives)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号