def make_train_char_db(ori_img_dir):
import pandas as pd
import h5py
"""
??????????
:param ori_img_dir: ????????
:return:
"""
even_split_train_path = os.path.join(os.getcwd(), 'evensplit_train_im')
if not os.path.exists(even_split_train_path):
os.makedirs(even_split_train_path)
train_imgs = os.listdir(ori_img_dir)
letters = list('02345678abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
answer_data = pd.read_table(os.path.join(os.getcwd(), 'lvy_ans.txt'), sep=':', names=['Index', 'Answer'])
# ????
img = np.zeros((len(train_imgs)*4, 1, 35, 35), dtype=np.uint8)
label = np.zeros((len(train_imgs)*4), dtype=np.uint32)
index = 0
for train_img in train_imgs:
ori_train_img = os.path.join(ori_img_dir, train_img)
binary_train_img = binary_img(ori_train_img) # ????????
dingge_train_img = ding_ge(binary_train_img) # ???????
# ?????
step_train = dingge_train_img.shape[1] / float(4)
start_train = [j for j in np.arange(0, dingge_train_img.shape[1], step_train).tolist()]
for p, k in enumerate(start_train):
print train_img + '_' + str((p+1))
split_train_img = dingge_train_img[:, k:k + step_train]
small_img = ding_ge(split_train_img)
split_train_resize_img = cv2.resize(small_img, (35, 35))
img[index, 0, :, :] = split_train_resize_img
label[index] = letters.index(answer_data['Answer'][int(train_img.split('.')[0])-1][p])
index += 1
cv2.imwrite(os.path.join(even_split_train_path,
train_img.split('.')[0] + '_' + str(p+1) + '.png'), split_train_resize_img*255)
f = h5py.File(os.path.join(os.getcwd(), 'train_chars_data.h5'), 'w')
f.create_dataset('img', data=img)
f.create_dataset('label', data=label)
f.close()
评论列表
文章目录