__init__.py 文件源码

python
阅读 17 收藏 0 点赞 0 评论 0

项目:spacy-dev-resources 作者: explosion 项目源码 文件源码
def build_vocab(language, corpus_files_root):
    corpus_dir = CORPUS_DIR.format(lang=language)
    local("mkdir -p {}".format(corpus_dir))

    model_dir = MODEL_DIR.format(lang=language)
    local("mkdir -p {}".format(model_dir))

    corpus_file = join(corpus_dir, "{}_wiki.corpus".format(language))
    merge_corpus(corpus_files_root, corpus_file)

    word_freq_path = join(model_dir, "{}_wiki.freqs".format(language))
    word_counts(corpus_files_root + "/*", word_freq_path)

    word2vec_model_path = join(model_dir, "{}_wiki.word2vec".format(language))
    word2vec(corpus_file, word2vec_model_path)

    brown_out_dir = join(model_dir, "brown")
    brown_clusters(corpus_file, brown_out_dir)

    init_vocab(language, model_dir, word_freq_path, word2vec_model_path, brown_out_dir)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号