def stratified_folds(self, nfolds=5, sortby=None):
self.df[('meta', 'Folds')] = np.NaN # Create an entry in the data frame that holds the folds
self.df.sort_values(by=sortby, inplace=True) # sort the data frame by the column of interest
uniqvals = np.unique(self.df[sortby]) # get the unique values from the column of interest
# assign folds by stepping through the unique values
fold_num = 1
for i in uniqvals:
ind = self.df[sortby] == i # find where the data frame matches the unique value
self.df.set_value(self.df.index[ind], ('meta', 'Folds'), fold_num)
# Inrement the fold number, reset to 1 if it is greater than the desired number of folds
fold_num = fold_num + 1
if fold_num > nfolds:
fold_num = 1
# sort by index to return the df to its original order
self.df.sort_index(inplace=True)
self.folds_hist(sortby,50)
评论列表
文章目录