word2veckeras.py 文件源码-python代码片段

word2veckeras.py 文件源码

python

阅读 28 收藏 0 点赞 0 评论 0

项目：word2vec-keras-in-gensim 作者: niitsuma 项目源码文件源码

def train_batch_cbow_xy_generator(model, sentences):
    for sentence in sentences:
        word_vocabs = [model.vocab[w] for w in sentence if w in model.vocab and  model.vocab[w].sample_int > model.random.rand() * 2**32]
        for pos, word in enumerate(word_vocabs):
            reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
            start = max(0, pos - model.window + reduced_window)
            window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
            word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
            xy_gen=train_cbow_pair(model, word , word2_indices , None, None)
            for xy in xy_gen:
                if xy !=None:
                    yield xy