def __init__(self, inputs, labels, test_indices=None, **kwargs):
"""Encapsulates all pieces of data to run an experiment. This is basically a bag of items that makes it
easy to serialize and deserialize everything as a unit.
Args:
inputs: The raw model inputs. This can be set to None if you dont want
to serialize this value when you save the dataset.
labels: The raw output labels.
test_indices: The optional test indices to use. Ideally, this should be generated one time and reused
across experiments to make results comparable. `generate_test_indices` can be used generate first
time indices.
**kwargs: Additional key value items to store.
"""
self.X = np.array(inputs)
self.y = np.array(labels)
for key, value in kwargs.items():
setattr(self, key, value)
self._test_indices = None
self._train_indices = None
self.test_indices = test_indices
self.is_multi_label = isinstance(labels[0], (set, list, tuple))
self.label_encoder = MultiLabelBinarizer() if self.is_multi_label else LabelBinarizer()
self.y = self.label_encoder.fit_transform(self.y).flatten()
评论列表
文章目录