util.py 文件源码

python
阅读 40 收藏 0 点赞 0 评论 0

项目:lang-reps 作者: chaitanyamalaviya 项目源码 文件源码
def load_from_corpus(cls, reader, remake=False, src_or_tgt="src"):
        vocab_fname = reader.fname+".vocab-"+reader.mode+"-"+src_or_tgt
        if not remake and os.path.isfile(vocab_fname):
            return Vocab.load(vocab_fname)
        else:
            v = Vocab()
            count = 0  # count of sentences
            for item in reader:
                toklist = item
                for token in toklist:
                    v.add(token)
                count += 1
                if count % 10000 == 0:
                    print("...", count, end="")
            print("\nSaving " + src_or_tgt + " vocab of size", v.size)
            v.START_TOK = v[reader.begin] if reader.begin is not None else None
            v.END_TOK = v[reader.end] if reader.end is not None else None
            v.save(vocab_fname)
            return v


#### reader class
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号