def generateTagCloudForGroupV2(self, groupName, userName=None):
records = None
if userName is None:
records = self.coll.find({ 'to': groupName }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum)
allRecords = self.coll.find({ 'to': { '$ne': groupName } }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum * 5)
allRecordsGroup = sorted(allRecords, key=lambda x: x['to'])
else:
records = self.coll.find({ 'from': userName, 'to': groupName }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum)
allRecords = self.coll.find({ 'from': { '$ne': userName }, 'to': groupName }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum * 5)
allRecordsGroup = sorted(allRecords, key=lambda x: x['from'])
docThisGroup = list(jieba.cut(' '.join([ r['content'] for r in records if re.match('<<<IMG', r['content']) is None]))) # remove the image records
allRecordsGroup = itertools.groupby(allRecordsGroup, lambda x: x['to'])
docsOtherGroups = [ list(jieba.cut(' '.join([x['content'] for x in list(g) if re.match('<<<IMG', x['content']) is None]))) for k, g in allRecordsGroup ]
docs = [ docThisGroup ] + docsOtherGroups
dictionary = gensim.corpora.Dictionary(docs)
docs = [ dictionary.doc2bow(doc) for doc in docs ]
id2token = { v: k for k, v in dictionary.token2id.items() }
tfidf = gensim.models.tfidfmodel.TfidfModel(corpus=docs)
tagCloudFrequencies = { id2token[x[0]]: x[1] for x in tfidf[docs[0]] }
img = self.wordCloud.generate_from_frequencies(tagCloudFrequencies).to_image()
fn = self.generateTmpFileName()
img.save(fn)
return fn
# Generate a tag cloud image from the latest self.recordMaxNum messages. Return the file name.
评论列表
文章目录