fill_db.py 文件源码

python
阅读 39 收藏 0 点赞 0 评论 0

项目:corporadb 作者: nlesc-sherlock 项目源码 文件源码
def create_dummy_data(self):
    self.datasetname = 'sherlock'
    self.read_metadata_json(self.dataset.getMetadata())
    self.worddict, self.lenwords, self.randwords = self.dataset.loadVocabulary()
    # normalized probability matrix, words in a topic
    #self.wordprob = self.dataset.getWordsInTopicMatrix()
    #self.numtopics = numpy.shape(self.wordprob)[0]
    self.email_prob = self.dataset.getWordsInTopicMatrix()
    self.numtopics = numpy.shape(self.email_prob)[0]
    print(self.numtopics)
    # normalized probability matrix, emails in a topic
    self.num_emails = len(self.metadata)
    #self.email_prob = self.dataset.getDocsInTopicMatrix()
    self.wordprob = self.dataset.getDocsInTopicMatrix()
    #import pdb; pdb.set_trace()
    # distance matrix between topics
    self.distance_matrix = self.dataset.getTopicDistanceMatrix(self.wordprob)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号