def trained_models():
dataset = datasets.load_breast_cancer()
X = dataset.data
y = dataset.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
lr = LogisticRegression()
lr.fit(X_train, y_train)
svc_w_linear_kernel = SVC(kernel='linear')
svc_w_linear_kernel.fit(X_train, y_train)
svc_wo_linear_kernel = SVC()
svc_wo_linear_kernel.fit(X_train, y_train)
dummy = DummyClassifier()
dummy.fit(X_train, y_train)
return {'RF':rf, 'LR':lr, 'SVC_w_linear_kernel':svc_w_linear_kernel,
'Dummy':dummy, 'SVC_wo_linear_kernel':svc_wo_linear_kernel}
python类load_breast_cancer()的实例源码
def test_score_grid_func():
dataset = load_breast_cancer()
X, y = dataset['data'], dataset['target_names'].take(dataset['target'])
# Classifier to be used in the metaheuristic
clf = SVC()
for metaclass in METACLASSES:
meta = metaclass(classifier=clf, random_state=0, verbose=True,
make_logbook=True, repeat=1, number_gen=3,
size_pop=2)
print("Checking Grid: ", meta._name)
# Fit the classifier
meta.fit(X, y, normalize=True)
# See score
meta.score_func_to_gridsearch(meta)
def trained_models():
dataset = datasets.load_breast_cancer()
X = dataset.data
y = dataset.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
lr = LogisticRegression()
lr.fit(X_train, y_train)
svc_w_linear_kernel = SVC(kernel='linear')
svc_w_linear_kernel.fit(X_train, y_train)
svc_wo_linear_kernel = SVC()
svc_wo_linear_kernel.fit(X_train, y_train)
dummy = DummyClassifier()
dummy.fit(X_train, y_train)
return {'RF':rf, 'LR':lr, 'SVC_w_linear_kernel':svc_w_linear_kernel,
'Dummy':dummy, 'SVC_wo_linear_kernel':svc_wo_linear_kernel}
def data():
dataset = datasets.load_breast_cancer()
X = dataset.data
y = dataset.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)
return {'X_train':X_train, 'X_test':X_test, 'y_train':y_train, 'y_test':y_test}
def load_breast_cancer_df(include_tgt=True, tgt_name="target", shuffle=False):
"""Loads the breast cancer dataset into a dataframe with the
target set as the "target" feature or whatever name
is specified in ``tgt_name``.
Parameters
----------
include_tgt : bool, optional (default=True)
Whether to include the target
tgt_name : str, optional (default="target")
The name of the target feature
shuffle : bool, optional (default=False)
Whether to shuffle the rows
Returns
-------
X : pd.DataFrame, shape=(n_samples, n_features)
The loaded dataset
"""
bc = load_breast_cancer()
X = pd.DataFrame.from_records(data=bc.data, columns=bc.feature_names)
if include_tgt:
X[tgt_name] = bc.target
return X if not shuffle else shuffle_dataframe(X)
def setUp(self):
np.random.seed(0)
self.X, self.y = load_breast_cancer(return_X_y=True)
p = np.random.permutation(len(self.X))
self.X, self.y = self.X[p], self.y[p]
def setUp(self):
np.random.seed(0)
self.X, self.y = load_breast_cancer(return_X_y=True)
p = np.random.permutation(len(self.X))
self.X, self.y = self.X[p], self.y[p]
def setUp(self):
np.random.seed(0)
self.X, self.y = load_breast_cancer(return_X_y=True)
p = np.random.permutation(len(self.X))
self.X, self.y = self.X[p], self.y[p]
self.lr = LogisticRegression()
self.rf = RandomForestClassifier(random_state=8)
self.svc = LinearSVC()
self.lr_probas = self.lr.fit(self.X, self.y).predict_proba(self.X)
self.rf_probas = self.rf.fit(self.X, self.y).predict_proba(self.X)
self.svc_scores = self.svc.fit(self.X, self.y).\
decision_function(self.X)
def setUp(self):
np.random.seed(0)
self.X, self.y = load_breast_cancer(return_X_y=True)
p = np.random.permutation(len(self.X))
self.X, self.y = self.X[p], self.y[p]
def setUp(self):
np.random.seed(0)
self.X, self.y = load_breast_cancer(return_X_y=True)
p = np.random.permutation(len(self.X))
self.X, self.y = self.X[p], self.y[p]
def get_sample_dataset(dataset_properties):
"""Returns sample dataset
Args:
dataset_properties (dict): Dictionary corresponding to the properties of the dataset
used to verify the estimator and metric generators.
Returns:
X (array-like): Features array
y (array-like): Labels array
splits (iterator): This is an iterator that returns train test splits for
cross-validation purposes on ``X`` and ``y``.
"""
kwargs = dataset_properties.copy()
data_type = kwargs.pop('type')
if data_type == 'multiclass':
try:
X, y = datasets.make_classification(random_state=8, **kwargs)
splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
except Exception as e:
raise exceptions.UserError(repr(e))
elif data_type == 'iris':
X, y = datasets.load_iris(return_X_y=True)
splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
elif data_type == 'mnist':
X, y = datasets.load_digits(return_X_y=True)
splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
elif data_type == 'breast_cancer':
X, y = datasets.load_breast_cancer(return_X_y=True)
splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
elif data_type == 'boston':
X, y = datasets.load_boston(return_X_y=True)
splits = model_selection.KFold(n_splits=2, random_state=8).split(X)
elif data_type == 'diabetes':
X, y = datasets.load_diabetes(return_X_y=True)
splits = model_selection.KFold(n_splits=2, random_state=8).split(X)
else:
raise exceptions.UserError('Unknown dataset type {}'.format(dataset_properties['type']))
return X, y, splits
def test_breast_cancer(self):
dataset = load_breast_cancer()
score = np.mean(cross_val_score(
DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10))
print('breast_cancer: tree_type: {}, score = {}'.format(self.tree_type, score))
self.assertTrue(score > 0.8)
def test_breast_cancer(self):
dataset = load_breast_cancer()
score = np.mean(cross_val_score(
DecisionTreeClassifier(tree_type=self.tree_type), dataset.data, dataset.target, cv=10))
self.assertTrue(score > 0.8)
print('breast_cancer: tree_type: {}, score = {}'.format(self.tree_type, score))
def test_rocauc_quickmethod(self):
"""
Test the ROCAUC quick method
"""
data = load_breast_cancer()
model = DecisionTreeClassifier()
# TODO: impage comparison of the quick method
ax = roc_auc(model, data.data, data.target)
def test_plot():
dataset = load_breast_cancer()
X, y = dataset['data'], dataset['target_names'].take(dataset['target'])
# Classifier to be used in the metaheuristic
clf = SVC()
for metaclass in METACLASSES:
meta = metaclass(classifier=clf, random_state=0, verbose=False,
make_logbook=True, repeat=1, number_gen=2,
size_pop=2)
print("Checking plotting: ", meta._name)
# Fit the classifier
meta.fit(X, y, normalize=True)
# Transformed dataset
X_1 = meta.transform(X)
meta = metaclass(classifier=clf, random_state=0,
make_logbook=True, repeat=1, number_gen=2, size_pop=2)
# Fit and Transform
X_2 = meta.fit_transform(X=X, y=y, normalize=True)
assert_array_equal(X_1, X_2)
# Plot the results of each test
meta.plot_results()
ga = GeneticAlgorithm(classifier=clf, random_state=1,
make_logbook=False, repeat=1)
# check for error in plot
ga.fit(X, y, normalize=True)
assert_raises(ValueError, ga.plot_results)
def test_parallel():
dataset = load_breast_cancer()
X, y = dataset['data'], dataset['target_names'].take(dataset['target'])
# Classifier to be used in the metaheuristic
clf = SVC()
for metaclass in METACLASSES :
meta = metaclass(classifier=clf, random_state=0, make_logbook=False,
repeat=2, number_gen=2, parallel=True, verbose=True,
size_pop=2)
print("Checking parallel ", meta._name)
# Fit the classifier
meta.fit(X, y, normalize=True)
# Transformed dataset
X_1 = meta.transform(X)
meta = metaclass(classifier=clf, random_state=0, make_logbook=False,
repeat=2, number_gen=2, parallel=True, size_pop=2)
# Fit and Transform
X_2 = meta.fit_transform(X=X, y=y, normalize=True)
# Check Function
assert_array_equal(X_1, X_2)
def test_unusual_errors():
dataset = load_breast_cancer()
X, y = dataset['data'], dataset['target_names'].take(dataset['target'])
# Classifier to be used in the metaheuristic
clf = SVC()
for metaclass in METACLASSES:
meta = metaclass(classifier=clf, random_state=0, verbose=0,
make_logbook=True, repeat=1, number_gen=2, size_pop=2)
print("Checking unusual error: ", meta._name)
meta.fit(X, y, normalize=True)
# Let's suppose you have a empty array
meta.best_mask_ = np.array([])
assert_warns(UserWarning, meta.transform, X)
assert_raises(ValueError, meta.safe_mask, X, meta.best_mask_)
meta = metaclass(classifier=clf, random_state=0, verbose=0,
make_logbook=True, repeat=1, number_gen=2, size_pop=2)
assert_raises(ValueError, meta.score_func_to_gridsearch, meta)
for metaclass in [BRKGA, BRKGA2]:
assert_raises(ValueError, metaclass,classifier=clf, random_state=0, verbose=0,
make_logbook=True, repeat=1, number_gen=2, size_pop=2,
elite_size=5)
def test_predict():
dataset = load_breast_cancer()
X, y = dataset['data'], dataset['target_names'].take(dataset['target'])
# Classifier to be used in the metaheuristic
sa = SimulatedAnneling(size_pop=2, number_gen=2)
sa.fit(X,y, normalize=True)
sa.predict(X)
def data():
dataset = datasets.load_breast_cancer()
X = dataset.data
y = dataset.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)
return {'X_train':X_train, 'X_test':X_test, 'y_train':y_train, 'y_test':y_test}
def test_numerical_split():
bunch = load_breast_cancer()
id3Estimator = Id3Estimator()
id3Estimator.fit(bunch.data, bunch.target)
splitter = id3Estimator.builder_.splitter
record = splitter.calc(np.array(list(range(bunch.target.shape[0]))),
np.array(list(range(bunch.data.shape[1]))))
less = np.sum(bunch.data[:, record.feature_idx] <= record.pivot)
more = bunch.data[:, record.feature_idx].shape[0] - less
split = splitter.split(np.array(list(range(bunch.target.shape[0]))),
record)
assert_almost_equal(len(split[0].bag), less)
assert_almost_equal(len(split[1].bag), more)
def test_fit():
bunch = load_breast_cancer()
id3Estimator = Id3Estimator()
id3Estimator.fit(bunch.data, bunch.target)
assert_equal(id3Estimator.tree_.root.value, 22)
id3Estimator = Id3Estimator(max_depth=2)
id3Estimator.fit(bunch.data, bunch.target)
assert_equal(id3Estimator.tree_.root.value, 22)
id3Estimator = Id3Estimator(min_samples_split=20)
id3Estimator.fit(bunch.data, bunch.target)
assert_equal(id3Estimator.tree_.root.value, 22)
def test_gain_ratio():
id3Estimator = Id3Estimator(gain_ratio=True)
bunch = load_breast_cancer()
id3Estimator.fit(bunch.data, bunch.target)
assert_equal(id3Estimator.tree_.root.value, 23)
def test_prune():
id3estimator = Id3Estimator(prune=True)
bunch = load_breast_cancer()
id3estimator.fit(bunch.data, bunch.target)
def test_predict():
estimator = Id3Estimator()
bunch = load_breast_cancer()
estimator.fit(bunch.data, bunch.target)
sample = np.array([20.57, 17.77, 132.9, 1326, 0.08474, 0.07864, 0.0869,
0.07017, 0.1812, 0.05667, 0.5435, 0.7339, 3.398, 74.08,
0.005225, 0.01308, 0.0186, 0.0134, 0.01389, 0.003532,
24.99, 23.41, 158.8, 1956, 0.1238, 0.1866, 0.2416,
0.186, 0.275, 0.08902]).reshape(1, -1)
assert_almost_equal(estimator.predict(bunch.data), bunch.target)
assert_almost_equal(estimator.predict(sample), 0)
def test_load_breast_cancer():
res = load_breast_cancer()
assert_equal(res.data.shape, (569, 30))
assert_equal(res.target.size, 569)
assert_equal(res.target_names.size, 2)
assert_true(res.DESCR)