def __init__(self, loss='softmax', bucket=0, **kwargs):
"""
Exactly as the parent class `Word2Vec <https://radimrehurek.com/gensim/models/word2vec.html#gensim.models.word2vec.Word2Vec>`_.
Some parameter values are overwritten (e.g. sg=0 because we never use skip-gram here), look at the code for details.
Argument names must be explicit!
`loss` = one value in {ns, hs, softmax}. If "ns" is selected negative sampling will be used
as loss function, together with the parameter `negative`. With "hs" hierarchical softmax will be used,
while with "softmax" (default) the sandard softmax function (the other two are "approximations").
The `hs` argument does not exist anymore.
`bucket` is the maximum number of hashed words, i.e., we limit the feature space to this number,
ergo we use the hashing trick in the word vocabulary. Default to 0, NO hashing trick
It basically builds two vocabularies, one for the sample words and one for the labels,
so that the input layer is only made of words, while the output layer is only made of labels.
**Parent class methods that are not overridden here are not tested and not safe to use**.
"""
self.lvocab = {} # Vocabulary of labels only
self.index2label = []
kwargs['sg'] = 0
kwargs['window'] = sys.maxsize
kwargs['sentences'] = None
kwargs['hashfxn'] = custom_hash # Force a consistent function across different Python versions
self.softmax = self.init_loss(kwargs, loss)
self.bucket = bucket
super(LabeledWord2Vec, self).__init__(**kwargs)
评论列表
文章目录