def train_test_split(inpath, train, test, split, random_seed):
"""
RuCor doesn't provide train/test data splitting, it makes random splitting.
Args:
inpath: path to data
train: path to train folder
test: path to test folder
split: int, split ratio
random_seed: seed for random module
Returns:
"""
print('Start train-test splitting ...')
z = os.listdir(inpath)
doc_split = ShuffleSplit(1, test_size=split, random_state=random_seed)
for train_indeses, test_indeses in doc_split.split(z):
train_set = [z[i] for i in sorted(list(train_indeses))]
test_set = [z[i] for i in sorted(list(test_indeses))]
for x in train_set:
build_data.move(os.path.join(inpath, x), os.path.join(train, x))
for x in test_set:
build_data.move(os.path.join(inpath, x), os.path.join(test, x))
print('End train-test splitts.')
return None
评论列表
文章目录