def __init__(self, codes, separator='@@'):
self.encode = functools.lru_cache(maxsize=65536)(self.encode)
self.bpe_codes = [tuple(item.split()) for item in codes]
# some hacking to deal with duplicates (only consider first instance)
self.bpe_codes = dict([(code,i) for (i,code) in reversed(list(enumerate(self.bpe_codes)))])
self.separator = separator
评论列表
文章目录