def load_from_corpus(cls, reader, remake=False, src_or_tgt="src"):
vocab_fname = reader.fname+".vocab-"+reader.mode+"-"+src_or_tgt
if not remake and os.path.isfile(vocab_fname):
return Vocab.load(vocab_fname)
else:
v = Vocab()
count = 0 # count of sentences
for item in reader:
toklist = item
for token in toklist:
v.add(token)
count += 1
if count % 10000 == 0:
print("...", count, end="")
print("\nSaving " + src_or_tgt + " vocab of size", v.size)
v.START_TOK = v[reader.begin] if reader.begin is not None else None
v.END_TOK = v[reader.end] if reader.end is not None else None
v.save(vocab_fname)
return v
#### reader class
评论列表
文章目录