def fit(self, X, y):
"""A reference implementation of a fitting function
Parameters
----------
X : array-like or sparse matrix of shape = [n_samples, n_features]
The training input samples.
y : array-like, shape = [n_samples] or [n_samples, n_outputs]
The target values (class labels in classification, real numbers in
regression).
Returns
-------
self : object
Returns self.
"""
X, y = check_X_y(X, y)
# Return the estimator
return self
python类check_X_y()的实例源码
def fit(self, X, y):
"""A reference implementation of a fitting function for a classifier.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
The training input samples.
y : array-like, shape = [n_samples]
The target values. An array of int.
Returns
-------
self : object
Returns self.
"""
# Check that X and y have correct shape
X, y = check_X_y(X, y)
# Store the classes seen during fit
self.classes_ = unique_labels(y)
self.X_ = X
self.y_ = y
# Return the classifier
return self
def _check_X_y(self, X, y):
# helpful error message for sklearn < 1.17
is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2
if is_2d or type_of_target(y) != 'binary':
raise TypeError("Only binary targets supported. For training "
"multiclass or multilabel models, you may use the "
"OneVsRest or OneVsAll metaestimators in "
"scikit-learn.")
X, Y = check_X_y(X, y, dtype=np.double, accept_sparse='csc',
multi_output=False)
self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1)
y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double)
return X, y
def fit(self, x_, y, sample_weight=None):
X, y = check_X_y(x_, y, accept_sparse=[], y_numeric=True, multi_output=False)
x, y, X_offset, y_offset, X_scale = self._preprocess_data(
x_, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
copy=self.copy_X, sample_weight=None)
if sample_weight is not None:
x, y = _rescale_data(x, y, sample_weight)
self.iters = 0
self.ind_ = np.ones(x.shape[1], dtype=bool) # initial guess
if self.threshold > 0:
self._reduce(x, y)
else:
self.coef_ = self._regress(x[:, self.ind_], y, self.alpha)
if self.unbias and self.alpha >= 0:
self._unbias(x, y)
self._set_intercept(X_offset, y_offset, X_scale)
return self
def fit(self, x_, y, sample_weight=None):
n_samples, n_features = x_.shape
X, y = check_X_y(x_, y, accept_sparse=[], y_numeric=True, multi_output=False)
x, y, X_offset, y_offset, X_scale = self._preprocess_data(
x_, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
copy=self.copy_X, sample_weight=None)
if sample_weight is not None:
# Sample weight can be implemented via a simple rescaling.
x, y = _rescale_data(x, y, sample_weight)
coefs, intercept = fit_with_noise(x, y, self.sigma, self.alpha, self.n)
self.intercept_ = intercept
self.coef_ = coefs
self._set_intercept(X_offset, y_offset, X_scale)
return self
def fit(self, X, y=None, **fit_params):
# scikit-learn checks
X, y = check_X_y(X, y)
n_terms = min(self.n_terms, X.shape[1])
# Get a list of unique labels from y
labels = np.unique(y)
# Determine the n top terms per class
self.top_terms_per_class_ = {
c: set(np.argpartition(np.sum(X[y == c], axis=0), -n_terms)[-n_terms:])
for c in labels
}
# Return the classifier
return self
def fit(self, X, y):
#import traceback
from fabric.api import local
X, y = check_X_y(X, y, allow_nd=True, multi_output=True,
y_numeric=True, estimator="GridSearch")
print "njobs = {}".format(self.njobs)
if self.njobs > 1:
assert False
# iterable = [(i, pg, self.estimator_cls, self.kf, X, y, \
# self.score_fns, len(self.parameter_grid)) \
# for i,pg in enumerate(self.parameter_grid)]
# try:
# p = multiprocessing.Pool(self.njobs)
# res = p.map(mp_grid_search, iterable)
# print res
# except:
# traceback.print_exc()
else:
self.grid_scores = []
estimator = self.estimator_cls()
num_tasks = len(self.parameter_grid)
for i,params in enumerate(self.parameter_grid):
print "Starting task {}/{}...".format(i+1, num_tasks)
with stopwatch("Done. Elapsed time"):
self.grid_scores.append(mp_grid_search((i,
params,
estimator,
self.kf,
X,
y,
self.score_fns,
len(self.parameter_grid))))
if self.checkpoint_path is not None:
local("rm -f {}*.p".format(self.checkpoint_path))
savepath = self.checkpoint_path + "_{}.p".format(i)
with open(savepath, 'w') as f:
pickle.dump(self.grid_scores, f)
def check_X_y(self, X, y):
from sklearn.utils.validation import check_X_y
if X.shape[0] > GPR.MAX_TRAIN_SIZE:
raise Exception("X_train size cannot exceed {} ({})"
.format(GPR.MAX_TRAIN_SIZE, X.shape[0]))
return check_X_y(X, y, multi_output=True,
allow_nd=True, y_numeric=True,
estimator="GPR")
def fit(self, X_train, y_train, ridge=1.0):
self._reset()
X_train, y_train = self.check_X_y(X_train, y_train)
self.X_train = np.float32(X_train)
self.y_train = np.float32(y_train)
sample_size = self.X_train.shape[0]
if np.isscalar(ridge):
ridge = np.ones(sample_size) * ridge
assert ridge.ndim == 1
X_dists = np.zeros((sample_size, sample_size), dtype=np.float32)
with tf.Session(graph=self.graph, config=tf.ConfigProto(
intra_op_parallelism_threads=self.NUM_THREADS)) as sess:
dist_op = self.ops['dist_op']
v1, v2 = self.vars['v1_h'], self.vars['v2_h']
for i in range(sample_size):
X_dists[i] = sess.run(dist_op, feed_dict={v1:self.X_train[i], v2:self.X_train})
K_ridge_op = self.ops['K_ridge_op']
X_dists_ph = self.vars['X_dists_h']
ridge_ph = self.vars['ridge_h']
self.K = sess.run(K_ridge_op, feed_dict={X_dists_ph:X_dists, ridge_ph:ridge})
K_ph = self.vars['K_h']
K_inv_op = self.ops['K_inv_op']
self.K_inv = sess.run(K_inv_op, feed_dict={K_ph:self.K})
xy_op = self.ops['xy_op']
K_inv_ph = self.vars['K_inv_h']
yt_ph = self.vars['yt_h']
self.xy_ = sess.run(xy_op, feed_dict={K_inv_ph:self.K_inv,
yt_ph:self.y_train})
return self
def fit(self, X, y):
"""
Fit on X.
:param X: {array-like, sparse matrix}, shape (n_samples, n_features). Input data, where `n_samples` is the
number of samples and `n_features` is the number of features.
:return: Returns self
"""
# Numpy
X = np.array(X)
y = np.array(y)
# Check that X and y have correct shape
X, y = check_X_y(X, y)
# Store the classes seen during fit
self.classes_ = unique_labels(y)
# Store so that we know what we fitted on
self.X_ = X
self.y_ = y
# Get dimensions
input_dim = X.shape[1]
output_dim = len(self.classes_)
# Create a model if needed
if (input_dim, output_dim) != self.io:
self.model = self._build(input_dim, output_dim)
self.model.fit(X, y, batch_size=self.batch_size, epochs=self.epochs, verbose=self.verbose)
# Return the classifier
return self
def _check_X_y(self, X, y):
X, y = check_X_y(X, y, accept_sparse='csc', multi_output=False,
dtype=np.double, y_numeric=True)
y = y.astype(np.double).ravel()
return X, y
def fit(self, X, y):
"""Builds a forest of trees from the training set (X, y).
Parameters
----------
X : array-like or sparse matrix of shape = [n_samples, n_features]
The training input samples. Internally, its dtype will be converted
to ``dtype=np.float32``. If a sparse matrix is provided, it will be
converted into a sparse ``csc_matrix``.
y : array-like, shape = [n_samples] or [n_samples, n_outputs]
The target values (class labels in classification, real numbers in
regression).
sample_weight : array-like, shape = [n_samples] or None
Sample weights. If None, then samples are equally weighted. Splits
that would create child nodes with net zero or negative weight are
ignored while searching for a split in each node. In the case of
classification, splits are also ignored if they would result in any
single class carrying a negative weight in either child node.
Returns
-------
self : object
Returns self.
"""
X, y = check_X_y(X, y, dtype=np.float32, multi_output=False)
return super(MondrianForestRegressor, self).fit(X, y)
def fit(self, X, y):
"""Builds a forest of trees from the training set (X, y).
Parameters
----------
X : array-like or sparse matrix of shape = [n_samples, n_features]
The training input samples. Internally, its dtype will be converted
to ``dtype=np.float32``. If a sparse matrix is provided, it will be
converted into a sparse ``csc_matrix``.
y : array-like, shape = [n_samples] or [n_samples, n_outputs]
The target values (class labels in classification, real numbers in
regression).
sample_weight : array-like, shape = [n_samples] or None
Sample weights. If None, then samples are equally weighted. Splits
that would create child nodes with net zero or negative weight are
ignored while searching for a split in each node. In the case of
classification, splits are also ignored if they would result in any
single class carrying a negative weight in either child node.
Returns
-------
self : object
Returns self.
"""
X, y = check_X_y(X, y, dtype=np.float32, multi_output=False)
return super(MondrianForestClassifier, self).fit(X, y)
def fit(self, X, y):
X, y = check_X_y(X, y)
self.classes_ = unique_labels(y)
self.X_ = DynamicBayesianClassifier._first_col(X)
self.y_ = y
self.size_ = self.X_.size
for i in range(self.X_.size):
if y[i] not in self.dbayesmode_major_.keys():
self.dbayesmode_major_[y[i]] = scalgoutil.DBayesMode(y[i])
self.dbayesmode_major_[y[i]].update(self.X_[i])
self.update_priors()
return self
def fit(self, x, y, **kwargs):
#x, y = check_X_y(x, y, multi_output=False)
super().fit(self._transform(x, y), y, **kwargs)
self._arrange_coef()
return self
def fit(self, x, y=None):
x, y = check_X_y(x, y)
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=self.random_state)
self.front = run_ffx(x_train, x_test, y_train, y_test,
self.exponents, self.operators, num_alphas=self.num_alphas, l1_ratios=self.l1_ratios,
target_score=self.target_score, n_tail=self.n_tail, random_state=self.random_state,
strategies=self.strategies, n_jobs=self.n_jobs, max_complexity=self.max_complexity,
rational=self.rational, eps=self.eps, **self.kw)
self.make_model(x_test, y_test)
return self
def fit(self, X, y):
X, y = check_X_y(X, y)
return self
def fit(self, X, y):
X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
if sp.issparse(X):
raise ValueError("Nonsensical Error")
return self
def fit(self, X, y):
X, y = check_X_y(X, y)
self.coef_ = np.ones(X.shape[1])
return self
def fit(self, X, y):
"""
Train the Logistic model, X and y are numpy arrays.
"""
X, y = check_X_y(X, y)
#, accept_sparse=['csr', 'csc']) # not sure how to handle sparse
self.classes_, y = np.unique(y, return_inverse=True)
if self.fit_intercept:
X = np.insert(X, 0, 1, axis=1)
w0 = np.zeros(X.shape[1])
if self.bounds is None:
self.bounds_ = [(None, None) for v in w0]
elif isinstance(self.bounds, tuple) and len(self.bounds) == 2:
self.bounds_ = [self.bounds for v in w0]
elif self.fit_intercept and len(self.bounds) == len(w0) - 1:
self.bounds_ = np.concatenate(([(None, None)], self.bounds))
else:
self.bounds_ = self.bounds
if len(self.bounds_) != len(w0):
raise ValueError("Bounds must be the same length as the coef")
if isinstance(self.l2, Number):
self.l2_ = [self.l2 for v in w0]
elif self.fit_intercept and len(self.l2) == len(w0) - 1:
self.l2_ = np.insert(self.l2, 0, 0)
else:
self.l2_ = self.l2
if len(self.l2_) != len(w0):
raise ValueError("L2 penalty must be the same length as the coef, be sure the intercept is accounted for.")
# the intercept should never be regularized.
if self.fit_intercept:
self.l2_[0] = 0.0
w = minimize(_ll, w0, args=(X, y, self.l2_),
jac=_ll_grad,
method=self.method, bounds=self.bounds_,
options={'maxiter': self.max_iter,
#'disp': True
})['x']
if self.fit_intercept:
self.intercept_ = w[0:1]
self.coef_ = w[1:]
else:
self.intercept_ = np.array([])
self.coef_ = w
return self
def fit(self, X, y=None, **fit_params):
# Check that X and y have correct shape
X, y = check_X_y(X, y)
# meta_features_ have as many rows as there are in X and as many
# columns as there are models. However, if use_proba is True then
# ((n_classes - 1) * n_models) columns have to be stored
if self.use_proba:
self.n_probas_ = len(np.unique(y)) - 1
self.meta_features_ = np.empty((len(X), len(self.models) * (self.n_probas_)))
else:
self.meta_features_ = np.empty((len(X), len(self.models)))
# Generate CV folds
folds = self.cv.split(X, y)
for train_index, test_index in folds:
for i, (name, model) in enumerate(self.models.items()):
# Extract fit params for the model
model_fit_params = fit_params.get(name, {})
# Train the model on the training set
model.fit(X[train_index], y[train_index], **model_fit_params)
# If use_proba is True then the probabilities of each class for
# each model have to be predicted and then stored into
# meta_features
if self.use_proba:
probabilities = model.predict_proba(X[test_index])
for j, k in enumerate(range(self.n_probas_ * i, self.n_probas_ * (i + 1))):
self.meta_features_[test_index, k] = probabilities[:, j]
else:
self.meta_features_[test_index, i] = model.predict(X[test_index])
# Combine the predictions with the original features
if self.use_base_features:
self.meta_features_ = np.hstack((self.meta_features_, X))
self.meta_model.fit(self.meta_features_, y)
# Each model has to be fit on all the data for further predictions
for model in self.models.values():
model.fit(X, y)
return self
def fit(self, X, y):
"""Fit Gaussian process classification model
Parameters
----------
X : array-like, shape = (n_samples, n_features)
Training data
y : array-like, shape = (n_samples,)
Target values, must be binary
Returns
-------
self : returns an instance of self.
"""
X, y = check_X_y(X, y, multi_output=False)
self.base_estimator_ = _BinaryGaussianProcessClassifierLaplace(
self.kernel, self.optimizer, self.n_restarts_optimizer,
self.max_iter_predict, self.warm_start, self.copy_X_train,
self.random_state)
self.classes_ = np.unique(y)
self.n_classes_ = self.classes_.size
if self.n_classes_ == 1:
raise ValueError("GaussianProcessClassifier requires 2 or more "
"distinct classes. Only class %s present."
% self.classes_[0])
if self.n_classes_ > 2:
if self.multi_class == "one_vs_rest":
self.base_estimator_ = \
OneVsRestClassifier(self.base_estimator_,
n_jobs=self.n_jobs)
elif self.multi_class == "one_vs_one":
self.base_estimator_ = \
OneVsOneClassifier(self.base_estimator_,
n_jobs=self.n_jobs)
else:
raise ValueError("Unknown multi-class mode %s"
% self.multi_class)
self.base_estimator_.fit(X, y)
if self.n_classes_ > 2:
self.log_marginal_likelihood_value_ = np.mean(
[estimator.log_marginal_likelihood()
for estimator in self.base_estimator_.estimators_])
else:
self.log_marginal_likelihood_value_ = \
self.base_estimator_.log_marginal_likelihood()
return self