makelowdim.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:histwords 作者: williamleif 项目源码 文件源码
def worker(proc_num, queue, out_dir, in_dir, count_dir, words, dim, num_words, min_count=100):
    while True:
        if queue.empty():
            break
        year = queue.get()
        print "Loading embeddings for year", year
        time.sleep(random.random() * 120)
        valid_words = set(words_above_count(count_dir, year, min_count))
        print len(valid_words)
        words = list(valid_words.intersection(words[year][:num_words]))
        print len(words)
        base_embed = Explicit.load((in_dir + INPUT_FORMAT).format(year=year), normalize=False)
        base_embed = base_embed.get_subembed(words, restrict_context=True)
        print "SVD for year", year
        u, s, v = randomized_svd(base_embed.m, n_components=dim, n_iter=5)
        print "Saving year", year
        np.save((out_dir + OUT_FORMAT).format(year=year, dim=dim) + "-u.npy", u)
        np.save((out_dir + OUT_FORMAT).format(year=year, dim=dim) + "-v.npy", v)
        np.save((out_dir + OUT_FORMAT).format(year=year, dim=dim) + "-s.npy", s)
        write_pickle(base_embed.iw, (out_dir + OUT_FORMAT).format(year=year, dim=dim) + "-vocab.pkl")
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号