def load_gnt_file(filename):
"""
Load characters and images from a given GNT file.
:param filename: The file path to load.
:return: (image: Pillow.Image.Image, character) tuples
"""
# Thanks to nhatch for the code to read the GNT file, available at https://github.com/nhatch/casia
with open(filename, "rb") as f:
while True:
packed_length = f.read(4)
if packed_length == b'':
break
length = struct.unpack("<I", packed_length)[0]
raw_label = struct.unpack(">cc", f.read(2))
width = struct.unpack("<H", f.read(2))[0]
height = struct.unpack("<H", f.read(2))[0]
photo_bytes = struct.unpack("{}B".format(height * width), f.read(height * width))
# Comes out as a tuple of chars. Need to be combined. Encoded as gb2312, gotta convert to unicode.
label = decode(raw_label[0] + raw_label[1], encoding="gb2312")
# Create an array of bytes for the image, match it to the proper dimensions, and turn it into an image.
image = toimage(np.array(photo_bytes).reshape(height, width))
yield image, label
评论列表
文章目录