def split_dataset(dataframe, training_ratio=.8, do_segment_split=True, shuffle=False, random_state=None):
"""
Splits the dataset into a training and test partition.
:param dataframe: A data frame to split. Should have a 'Preictal' column.
:param training_ratio: The ratio of the data to use for the first part.
:param do_segment_split: If True, the split will be done on whole segments.
:param shuffle: If true, the split will shuffle the data before splitting.
:param random_state: Seed
:return: A pair of disjoint data frames, where the first frame contains *training_ratio* of all the data.
"""
# We'll make the splits based on the sklearn cross validators,
# We calculate the number of folds which correspond to the
# desired training ratio. If *r* is the training ratio and *k*
# the nubmer of folds, we'd like *r* = (*k* - 1)/*k*, that is,
# the ratio should be the same as all the included folds divided
# by the total number of folds. This gives us *k* = 1/(1-*r*)
k = int(np.floor(1/(1 - training_ratio)))
if do_segment_split:
# We use the segment based cross validator to get a stratified split.
cv = SegmentCrossValidator(dataframe,
n_folds=k,
shuffle=shuffle,
random_state=random_state)
else:
# Don't split by segment, but still do a stratified split
cv = cross_validation.StratifiedKFold(dataframe['Preictal'],
n_folds=k,
shuffle=shuffle,
random_state=random_state)
training_indices, test_indices = first(cv)
return dataframe.iloc[training_indices], dataframe.iloc[test_indices]
评论列表
文章目录