def generate_folds(labels_fname, folds_fname, max_n_folds=10):
"""
Generate folds for CV exps with n = 2, ..., max_n_folds.
Save as pickled dict with n as key.
"""
filenames = read_labels(labels_fname)['__filenames__']
folds = {}
for n in range(2, max_n_folds + 1):
# Create folds from complete texts only
# (i.e. instances/sentences of the same text are never in different folds).
# There is no random seed, because the partitioning algorithm is deterministic.
group_k_fold = GroupKFold(n_splits=n)
# Don't bother to pass real X and Y, because they are not really used.
folds[n] = list(group_k_fold.split(filenames, filenames, filenames))
print('writing folds to ' + folds_fname)
pickle.dump(folds, open(folds_fname, 'wb'))
评论列表
文章目录