def __init__(self, input=None, topicDict=None, opinionDict=None,
testSplit=None, file_dict=None, topicLines=[0],
opinionLines=[1]):
if not file_dict is None:
logger.info('initialize CPT Corpus with file_dict: {} perspectives'
.format(len(file_dict)))
self.perspectives = [Perspective(file_dict=file_dict.get(str(p)),
topicLines=topicLines,
opinionLines=opinionLines)
for p in range(len(file_dict))]
else:
logger.info('initialize CPT Corpus with {} perspectives'
.format(len(input)))
input.sort()
self.perspectives = [Perspective(input=glob.glob('{}/*.txt'.
format(d)), testSplit=testSplit,
topicLines=topicLines,
opinionLines=opinionLines)
for d in input]
self.input = input
if isinstance(topicDict, str) or isinstance(topicDict, unicode):
self.load_dictionaries(topicDict=topicDict)
elif isinstance(topicDict, corpora.Dictionary):
self.topicDictionary = topicDict
if isinstance(opinionDict, str) or isinstance(opinionDict, unicode):
self.load_dictionaries(opinionDict=opinionDict)
elif isinstance(opinionDict, corpora.Dictionary):
self.opinionDictionary = opinionDict
if not topicDict or not opinionDict:
self._create_corpus_wide_dictionaries()
self.testSplit = testSplit
self.nPerspectives = len(self.perspectives)
评论列表
文章目录