def create_dummy_data(self):
self.datasetname = 'sherlock'
self.read_metadata_json(self.dataset.getMetadata())
self.worddict, self.lenwords, self.randwords = self.dataset.loadVocabulary()
# normalized probability matrix, words in a topic
#self.wordprob = self.dataset.getWordsInTopicMatrix()
#self.numtopics = numpy.shape(self.wordprob)[0]
self.email_prob = self.dataset.getWordsInTopicMatrix()
self.numtopics = numpy.shape(self.email_prob)[0]
print(self.numtopics)
# normalized probability matrix, emails in a topic
self.num_emails = len(self.metadata)
#self.email_prob = self.dataset.getDocsInTopicMatrix()
self.wordprob = self.dataset.getDocsInTopicMatrix()
#import pdb; pdb.set_trace()
# distance matrix between topics
self.distance_matrix = self.dataset.getTopicDistanceMatrix(self.wordprob)
评论列表
文章目录