def train(self, sentences, total_words=None, word_count=0,
total_examples=None, queue_factor=2, report_delay=1.0):
"""
Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
For Word2Vec, each sentence must be a list of unicode strings. (Subclasses may accept other examples.)
To support linear learning-rate decay from (initial) alpha to min_alpha, either total_examples
(count of sentences) or total_words (count of raw words in sentences) should be provided, unless the
sentences are the same as those that were used to initially build the vocabulary.
"""
if self.bucket > 0:
sentences = HashIter(sentences, self.bucket, with_labels=True)
if (self.model_trimmed_post_training):
raise RuntimeError("Parameters for training were discarded using model_trimmed_post_training method")
if FAST_VERSION < 0:
import warnings
warnings.warn("C extension not loaded for Word2Vec, training will be slow. "
"Install a C compiler and reinstall gensim for fast training.")
self.neg_labels = []
if self.negative > 0:
# precompute negative labels optimization for pure-python training
self.neg_labels = zeros(self.negative + 1)
self.neg_labels[0] = 1.
return super(LabeledWord2Vec, self).train(sentences, total_words, word_count,
total_examples, queue_factor, report_delay)
评论列表
文章目录