def _load_g2p_map(self, code):
"""Load the code table for the specified language.
Args:
code (str): ISO 639-3 code plus "-" plus ISO 15924 code for the
language/script to be loaded
"""
g2p = defaultdict(list)
gr_by_line = defaultdict(list)
try:
path = os.path.join('data', 'map', code + '.csv')
path = pkg_resources.resource_filename(__name__, path)
except IndexError:
raise DatafileError('Add an appropriately-named mapping to the data/maps directory.')
with open(path, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
next(reader)
for (i, fields) in enumerate(reader):
try:
graph, phon = fields
except ValueError:
raise DatafileError('Map file is not well formed at line {}.'.format(i + 2))
graph = unicodedata.normalize('NFC', graph)
phon = unicodedata.normalize('NFC', phon)
g2p[graph].append(phon)
gr_by_line[graph].append(i)
if self._one_to_many_gr_by_line_map(g2p):
graph, lines = self._one_to_many_gr_by_line_map(gr_by_line)
lines = [l + 2 for l in lines]
raise MappingError('One-to-many G2P mapping for "{}" on lines {}'.format(graph, ', '.join(map(str, lines))).encode('utf-8'))
return g2p
评论列表
文章目录