def attribute_category(out, ratios):
''' This function distributes each subject in a 'train' or 'test' category.
Args:
out (pd.DataFrame): a pd.DataFrame that contains the info of all files
by subject.
ratios (list): a list containing the proportions of train/test
subjects. should sum to 1 and supposedly it has been tested before.
Returns:
out (pd.DataFrame): a pd.DataFrame that contains the info of all files
by subject where the 'category' column has been set to either
train or test depending the result of the random draw.
The value of test or train is the same for a given subject.
'''
nSubjects = len(out.subject.unique())
i_train = np.random.choice( np.arange(nSubjects), int(ratios[0] * nSubjects))
train_or_test_by_subject = [
'train' if i in i_train else 'test' for i in range(nSubjects)]
images_per_subject = out.groupby(["subject"]).category.count().values
out.category = list(np.repeat(train_or_test_by_subject,
images_per_subject))
return(out)
评论列表
文章目录