def lexicon_iterator(path: str,
vocab_source: Dict[str, int],
vocab_target: Dict[str, int]) -> Generator[Tuple[int, int, float], None, None]:
"""
Yields lines from a translation table of format: src, trg, logprob.
:param path: Path to lexicon file.
:param vocab_source: Source vocabulary.
:param vocab_target: Target vocabulary.
:return: Generator returning tuples (src_id, trg_id, prob).
"""
assert C.UNK_SYMBOL in vocab_source
assert C.UNK_SYMBOL in vocab_target
src_unk_id = vocab_source[C.UNK_SYMBOL]
trg_unk_id = vocab_target[C.UNK_SYMBOL]
with smart_open(path) as fin:
for line in fin:
src, trg, logprob = line.rstrip("\n").split("\t")
prob = np.exp(float(logprob))
src_id = vocab_source.get(src, src_unk_id)
trg_id = vocab_target.get(trg, trg_unk_id)
yield src_id, trg_id, prob
评论列表
文章目录