def createDictionary(extraLabel=""):
# TODO in the report note the optimization done on the dict - it was ~700 000 workds, now ~90 000
dic = Dictionary()
d = corpora.Dictionary(dic)
d.filter_extremes(no_below=10, no_above=0.6, keep_n=None)
d.compactify()
# add the visual terms as words in the vocabulary too
d.add_documents([get_visual_terms_labels(config)])
extraLabel = extraLabel+"_"+config.dictionary_label
fName = 'data/dics/%s_%s.dict' % (pretty_current_time(), extraLabel)
d.save(fName+'.bin')
d.save_as_text(fName+'.txt')
setLastDictFileName(fName+'.bin')
logger.info('Dict created and saved to %s. Size: %i' % (fName, len(d)))
return d
评论列表
文章目录