scoreword2veckeras.py 文件源码-python代码片段

scoreword2veckeras.py 文件源码

python

阅读 22 收藏 0 点赞 0 评论 0

项目：word2vec-keras-in-gensim 作者: niitsuma 项目源码文件源码

def train_batch_score_cbow_xy_generator(model, scored_word_sentences):
    for scored_word_sentence in scored_word_sentences:
        #print scored_word_sentence
        scored_word_vocabs = [[model.vocab[w],s] for [w,s] in scored_word_sentence if w in model.vocab and  model.vocab[w].sample_int > model.random.rand() * 2**32]
        for pos, scored_word in enumerate(scored_word_vocabs):
            reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
            start = max(0, pos - model.window + reduced_window)
            window_pos = enumerate(scored_word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
            word2_indices = [scored_word2[0].index for pos2, scored_word2 in window_pos if (scored_word2 is not None and scored_word2[0] is not None and pos2 != pos)]
            xy_gen=train_cbow_pair(model, scored_word[0] , word2_indices , None, None)
            for xy in xy_gen:
                if xy !=None:
                    xy1=[xy[0],xy[1],xy[2],[scored_word[1]]]
                    yield xy1

            # if xy !=None:
            #     xy1=[xy[0],xy[1],xy[2],scored_word[1]]
            #     yield xy1