reader.py 文件源码-python代码片段

def load_w2v(corpus, dictionary):
    '''
    Return the trained Word2Vec model
    Train a model if model doesn't exist yet
    :param corpus:
    :param dictionary:
    :return:
    '''
    if not os.path.isfile(W2V_MODEL_PATH):
        num_features = 300    # Word vector dimensionality
        min_word_count = 5    # Minimum word count
        num_workers = 5       # Number of threads to run in parallel
        window = 5          # Context window size
        downsampling = 1e-5   # Downsample setting for frequent words
        print("Training the word2vec model!")
        sents = get_review_sentences()
        # Initialize and train the model (this will take some time)
        model = models.Word2Vec(sents, workers=num_workers, \
                    size=num_features, min_count = min_word_count, \
                    window = window, sample = downsampling)

        # If you don't plan to train the model any further, calling
        # init_sims will make the model much more memory-efficient.
        model.init_sims(replace=True)

        # It can be helpful to create a meaningful model name and
        # save the model for later use. You can load it later using Word2Vec.load()
        model.save(W2V_MODEL_PATH)
        tfidf = models.Word2Vec(corpus)
        print('Word2vec model created!')

    print('Loading word2vec model')
    w2v = models.Word2Vec.load(W2V_MODEL_PATH)
    print('Loading word2vec model complished!')
    return w2v