def main():
parser = argparse.ArgumentParser(
description='dataset generator'
)
parser.add_argument(
'-p', '--possibility',
type=float,
default=0.9,
help='possibility to add train dataset'
)
parser.add_argument(
'source',
help='path to mecab-processed corpus (xz compressed)'
)
parser.add_argument(
'train',
help='path for writing training dataset (xz compressed)'
)
parser.add_argument(
'test',
help='path for writing testing dataset (xz compressed)'
)
args = parser.parse_args()
with lzma.open(args.source, 'rt') as source,\
lzma.open(args.train, 'wb') as train,\
lzma.open(args.test, 'wb') as test:
separate(source, args.possibility, train, test)
评论列表
文章目录