def initialize(self, corpus):
"""
Initialize the random projection matrix.
"""
if self.id2word is None:
logger.info("no word id mapping provided; initializing from corpus, assuming identity")
self.id2word = utils.dict_from_corpus(corpus)
self.num_terms = len(self.id2word)
else:
self.num_terms = 1 + max([-1] + self.id2word.keys())
shape = self.num_topics, self.num_terms
logger.info("constructing %s random matrix" % str(shape))
# Now construct the projection matrix itself.
# Here i use a particular form, derived in "Achlioptas: Database-friendly random projection",
# and his (1) scenario of Theorem 1.1 in particular (all entries are +1/-1).
randmat = 1 - 2 * numpy.random.binomial(1, 0.5, shape) # convert from 0/1 to +1/-1
self.projection = numpy.asfortranarray(randmat, dtype=numpy.float32) # convert from int32 to floats, for faster multiplications
评论列表
文章目录