def test_classification():
# Check classification for various parameter settings.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(iris.data,
iris.target,
random_state=rng)
grid = ParameterGrid({"max_samples": [0.5, 1.0],
"max_features": [1, 2, 4],
"bootstrap": [True, False],
"bootstrap_features": [True, False]})
for base_estimator in [None,
DummyClassifier(),
Perceptron(),
DecisionTreeClassifier(),
KNeighborsClassifier(),
SVC()]:
for params in grid:
BaggingClassifier(base_estimator=base_estimator,
random_state=rng,
**params).fit(X_train, y_train).predict(X_test)
python类Perceptron()的实例源码
def test_base():
# Check BaseEnsemble methods.
ensemble = BaggingClassifier(base_estimator=Perceptron(), n_estimators=3)
iris = load_iris()
ensemble.fit(iris.data, iris.target)
ensemble.estimators_ = [] # empty the list and create estimators manually
ensemble._make_estimator()
ensemble._make_estimator()
ensemble._make_estimator()
ensemble._make_estimator(append=False)
assert_equal(3, len(ensemble))
assert_equal(3, len(ensemble.estimators_))
assert_true(isinstance(ensemble[0], Perceptron))
def test_nested_circles():
# Test the linear separability of the first 2D KPCA transform
X, y = make_circles(n_samples=400, factor=.3, noise=.05,
random_state=0)
# 2D nested circles are not linearly separable
train_score = Perceptron().fit(X, y).score(X, y)
assert_less(train_score, 0.8)
# Project the circles data into the first 2 components of a RBF Kernel
# PCA model.
# Note that the gamma value is data dependent. If this test breaks
# and the gamma value has to be updated, the Kernel PCA example will
# have to be updated too.
kpca = KernelPCA(kernel="rbf", n_components=2,
fit_inverse_transform=True, gamma=2.)
X_kpca = kpca.fit_transform(X)
# The data is perfectly linearly separable in that space
train_score = Perceptron().fit(X_kpca, y).score(X_kpca, y)
assert_equal(train_score, 1.0)
def train_classifier(self, trainvectors, labels, alpha='', iterations=50, jobs=10):
iterations = int(iterations)
jobs = int(jobs)
if alpha == '':
paramsearch = GridSearchCV(estimator=Perceptron(), param_grid=dict(alpha=numpy.linspace(0,2,20)[1:],n_iter=[iterations]), n_jobs=jobs)
paramsearch.fit(trainvectors,self.label_encoder.transform(labels))
selected_alpha = paramsearch.best_estimator_.alpha
elif alpha == 'default':
selected_alpha = 1.0
else:
selected_alpha = alpha
# train a perceptron with the settings that led to the best performance
self.model = Perceptron(alpha=selected_alpha,n_iter=iterations,n_jobs=jobs)
self.model.fit(trainvectors, self.label_encoder.transform(labels))
def test_basic(self, single_chunk_classification):
X, y = single_chunk_classification
a = PartialPerceptron(classes=[0, 1], max_iter=1000, tol=1e-3)
b = Perceptron(max_iter=1000, tol=1e-3)
a.fit(X, y)
b.partial_fit(X, y, classes=[0, 1])
assert_estimator_equal(a.coef_, b.coef_)
def __init__(self, isTrain, isOutlierRemoval=0):
super(ClassificationPLA, self).__init__(isTrain, isOutlierRemoval)
# data preprocessing
self.dataPreprocessing()
# PLA object
self.clf = Perceptron()
def estimate(X_train, y_train, X_test, y_test):
clf = Perceptron(random_state=241)
clf.fit(X_train, y_train)
prediciton = clf.predict(X_test)
return accuracy_score(y_test, prediciton)
def _init_model(self):
return Perceptron()
def __init__(self, genres, data, type='knn', name='', clf_kwargs=None):
self.logger = get_logger('classifier')
self.display_name = name
self.genres = genres
self.m_genres = { genre:i for i, genre in enumerate(genres) }
self.randstate = np.random.RandomState()
self.scaler = StandardScaler()
clf_kwargs = { } if not clf_kwargs else clf_kwargs
if type in ['svm', 'mlp']:
clf_kwargs['random_state'] = self.randstate
if type == 'knn':
self.proto_clf = KNeighborsClassifier(**clf_kwargs)
elif type == 'svm':
self.proto_clf = SVC(**clf_kwargs)
elif type == 'dtree':
self.proto_clf = DecisionTreeClassifier(**clf_kwargs)
elif type == 'gnb':
self.proto_clf = GaussianNB(**clf_kwargs)
elif type == 'perc':
self.proto_clf = Perceptron(**clf_kwargs)
elif type == 'mlp':
self.proto_clf = MLPClassifier(**clf_kwargs)
elif type == 'ada':
self.proto_clf = AdaBoostClassifier(**clf_kwargs)
else:
raise LookupError('Classifier type "{}" is invalid'.format(type))
self._convert_data(data)
self.logger.info('Classifier: {} (params={})'.format(
self.proto_clf.__class__.__name__,
clf_kwargs
))
def test_base_zero_n_estimators():
# Check that instantiating a BaseEnsemble with n_estimators<=0 raises
# a ValueError.
ensemble = BaggingClassifier(base_estimator=Perceptron(), n_estimators=0)
iris = load_iris()
assert_raise_message(ValueError,
"n_estimators must be greater than zero, got 0.",
ensemble.fit, iris.data, iris.target)
def test_perceptron_accuracy():
for data in (X, X_csr):
clf = Perceptron(n_iter=30, shuffle=False)
clf.fit(data, y)
score = clf.score(data, y)
assert_true(score >= 0.7)
def test_undefined_methods():
clf = Perceptron()
for meth in ("predict_proba", "predict_log_proba"):
assert_raises(AttributeError, lambda x: getattr(clf, x), meth)
def test_gridsearch_pipeline():
# Test if we can do a grid-search to find parameters to separate
# circles with a perceptron model.
X, y = make_circles(n_samples=400, factor=.3, noise=.05,
random_state=0)
kpca = KernelPCA(kernel="rbf", n_components=2)
pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())])
param_grid = dict(kernel_pca__gamma=2. ** np.arange(-2, 2))
grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
grid_search.fit(X, y)
assert_equal(grid_search.best_score_, 1)
def test_gridsearch_pipeline_precomputed():
# Test if we can do a grid-search to find parameters to separate
# circles with a perceptron model using a precomputed kernel.
X, y = make_circles(n_samples=400, factor=.3, noise=.05,
random_state=0)
kpca = KernelPCA(kernel="precomputed", n_components=2)
pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron())])
param_grid = dict(Perceptron__n_iter=np.arange(1, 5))
grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
X_kernel = rbf_kernel(X, gamma=2.)
grid_search.fit(X_kernel, y)
assert_equal(grid_search.best_score_, 1)
ClassificationUniformBlending.py 文件源码
项目:AirTicketPredicting
作者: junlulocky
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def __init__(self, isTrain, isOutlierRemoval=0):
super(ClassificationUniformBlending, self).__init__(isTrain, isOutlierRemoval)
# data preprocessing
self.dataPreprocessing()
# create logistic regression object
self.logreg = linear_model.LogisticRegression(tol=1e-6, penalty='l1', C=0.0010985411419875584)
# create adaboost object
self.dt_stump = DecisionTreeClassifier(max_depth=10)
self.ada = AdaBoostClassifier(
base_estimator=self.dt_stump,
learning_rate=1,
n_estimators=5,
algorithm="SAMME.R")
# create knn object
self.knn = neighbors.KNeighborsClassifier(2, weights='uniform')
# create decision tree object
self.decisiontree = DecisionTreeClassifier(max_depth=45, max_features='log2')
# create neural network object
self.net1 = NeuralNet(
layers=[ # three layers: one hidden layer
('input', layers.InputLayer),
('hidden', layers.DenseLayer),
#('hidden2', layers.DenseLayer),
('output', layers.DenseLayer),
],
# layer parameters:
input_shape=(None, 12), # inut dimension is 12
hidden_num_units=6, # number of units in hidden layer
#hidden2_num_units=3, # number of units in hidden layer
output_nonlinearity=lasagne.nonlinearities.sigmoid, # output layer uses sigmoid function
output_num_units=1, # output dimension is 1
# optimization method:
update=nesterov_momentum,
update_learning_rate=0.002,
update_momentum=0.9,
regression=True, # flag to indicate we're dealing with regression problem
max_epochs=25, # we want to train this many epochs
verbose=0,
)
# create PLA object
self.pla = Perceptron()
# create random forest object
self.rf = RandomForestClassifier(max_features='log2', n_estimators=20, max_depth=30)
def test_base_estimator():
# Check base_estimator and its default values.
rng = check_random_state(0)
# Classification
X_train, X_test, y_train, y_test = train_test_split(iris.data,
iris.target,
random_state=rng)
ensemble = BaggingClassifier(None,
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))
ensemble = BaggingClassifier(DecisionTreeClassifier(),
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))
ensemble = BaggingClassifier(Perceptron(),
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert_true(isinstance(ensemble.base_estimator_, Perceptron))
# Regression
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
ensemble = BaggingRegressor(None,
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))
ensemble = BaggingRegressor(DecisionTreeRegressor(),
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))
ensemble = BaggingRegressor(SVR(),
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert_true(isinstance(ensemble.base_estimator_, SVR))