def CVsplitter(taskcolumn, K):
'''
Divide tasks into roughly equal K sets, and do CV over such K sets.
'''
tasks = sorted(list(set(taskcolumn)))
tasks_splitted = [[] for _ in range(K)]
for (ind, task) in enumerate(tasks):
tasks_splitted[ind % K].append(task)
for fold in range(K):
print 'fold:', fold, 'testtasks:', tasks_splitted[fold]
test = np.logical_or.reduce([taskcolumn == taskid for taskid in
tasks_splitted[fold]])
yield (np.logical_not(test), test)
评论列表
文章目录