def word2vec(corpus_path, out_path, dim=150, threads=4, min_count=10, cbow=0):
local("mkdir -p {}".format(dirname(out_path)))
local(
"python -m gensim.scripts.word2vec_standalone " +
"-train {corpus_file} -output {file} -size {dim} -threads {threads} -min_count {min} -cbow {cbow}".format(
corpus_file=corpus_path,
dim=dim,
file=out_path,
threads=threads,
min=min_count,
cbow=cbow
)
)
local("bzip2 {}".format(out_path), capture=True)
# local(
# "python training/word_vectors.py {lang} {in_dir} {out_file} -n {threads} -d {dim}".format(
# dim=dim,
# in_dir=corpus_path,
# out_file=out_path,
# threads=threads,
# lang=language,
# )
# )
评论列表
文章目录