def __init__(self, codes, separator='@@'):
with codecs.open(codes.name, encoding='utf-8') as codes:
self.bpe_codes = [tuple(item.split()) for item in codes]
# some hacking to deal with duplicates (only consider first instance)
self.bpe_codes = dict([(code,i) for (i,code) in reversed(list(enumerate(self.bpe_codes)))])
self.separator = separator