def cli_render(input, output, size):
'''Render a JSONlines dataset to numpy arrays, saved in an HDF5 file.
'''
chars = []
images = []
for line in input:
datum = json.loads(line)
chars.append(datum['target'])
images.append(render(
[np.array(s) for s in datum['strokes']],
size))
vocab = list(sorted(set(chars)))
char_to_index = {ch: y for y, ch in enumerate(vocab)}
with h5py.File(output, 'a') as f:
str_dt = h5py.special_dtype(vlen=str)
f.require_dataset(
'vocab', (len(vocab),), dtype=str_dt
)[...] = vocab
f.require_dataset(
'x', shape=(len(images), size, size), dtype=np.float32
)[...] = np.array(images)
f.require_dataset(
'y', shape=(len(chars),), dtype=np.int
)[...] = np.array([char_to_index[ch] for ch in chars])
评论列表
文章目录