tools.py 文件源码-python代码片段

tools.py 文件源码

python

阅读 28 收藏 0 点赞 0 评论 0

项目：ConversationalQA 作者: btjhjeon 项目源码文件源码

def train_regressor(options, embed_map, wordvecs, worddict):
    """
    Return regressor to map word2vec to RNN word space
    """
    # Gather all words from word2vec that appear in wordvecs
    d = defaultdict(lambda : 0)
    for w in embed_map.vocab.keys():
        d[w] = 1
    shared = OrderedDict()
    count = 0
    for w in worddict.keys()[:options['n_words']-2]:
        if d[w] > 0:
            shared[w] = count
            count += 1

    # Get the vectors for all words in 'shared'
    w2v = numpy.zeros((len(shared), 300), dtype='float32')
    sg = numpy.zeros((len(shared), options['dim_word']), dtype='float32')
    for w in shared.keys():
        w2v[shared[w]] = embed_map[w]
        sg[shared[w]] = wordvecs[w]

    clf = LinearRegression()
    clf.fit(w2v, sg)
    return clf