create_kanji_dataset.py 文件源码

python
阅读 29 收藏 0 点赞 0 评论 0

项目:MachineLearning 作者: timomernick 项目源码 文件源码
def rasterize_all_kanji():
    df = pd.read_csv("kanji.csv", sep="\t", header=None)
    kanji_strings = df[1].dropna().values
    num_kanji = kanji_strings.size
    print("Kanji: " + str(num_kanji))

    weights = ["normal"]#"normal", "light", "bold"]
    num_weights = len(weights)

    images = np.zeros([num_kanji*num_weights*(num_augmentations+1), kanji_height, kanji_width])
    kanjis = np.zeros([num_kanji*num_weights*(num_augmentations+1)], dtype=np.uint32)

    image_idx = 0
    for kanji_idx in range(num_kanji):
        print("Kanji " + str(kanji_idx))
        kanji = kanji_strings[kanji_idx]

        for weight_idx in range(num_weights):
            weight = weights[weight_idx]
            image = rasterize_kanji(kanji, weights[weight_idx], "images/" + str(kanji_idx).zfill(5) + "_" + weight + ".png")
            images[image_idx] = image
            kanjis[image_idx] = kanji_idx
            image_idx += 1

            for augmentation_idx in range(num_augmentations):
                augmented_img = augment_kanji(image, augmentation_idx)
                #scipy.misc.imsave("aug_" + str(kanji_idx).zfill(4) + "_" + str(weight_idx) + "_" + str(augmentation_idx).zfill(2) + ".png", augmented_img)
                images[image_idx] = augmented_img
                kanjis[image_idx] = kanji_idx
                image_idx += 1

    return images, kanjis
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号