def fit(self, X, y):
"""Fit linear model.
Parameters
----------
X : numpy array of shape [n_samples, n_features]
Training data
y : numpy array of shape [n_samples]
Target values
Returns
-------
self : returns an instance of self.
"""
random_state = check_random_state(self.random_state)
X, y = check_X_y(X, y, y_numeric=True)
n_samples, n_features = X.shape
n_subsamples, self.n_subpopulation_ = self._check_subparams(n_samples,
n_features)
self.breakdown_ = _breakdown_point(n_samples, n_subsamples)
if self.verbose:
print("Breakdown point: {0}".format(self.breakdown_))
print("Number of samples: {0}".format(n_samples))
tol_outliers = int(self.breakdown_ * n_samples)
print("Tolerable outliers: {0}".format(tol_outliers))
print("Number of subpopulations: {0}".format(
self.n_subpopulation_))
# Determine indices of subpopulation
if np.rint(binom(n_samples, n_subsamples)) <= self.max_subpopulation:
indices = list(combinations(range(n_samples), n_subsamples))
else:
indices = [choice(n_samples,
size=n_subsamples,
replace=False,
random_state=random_state)
for _ in range(self.n_subpopulation_)]
n_jobs = _get_n_jobs(self.n_jobs)
index_list = np.array_split(indices, n_jobs)
weights = Parallel(n_jobs=n_jobs,
verbose=self.verbose)(
delayed(_lstsq)(X, y, index_list[job], self.fit_intercept)
for job in range(n_jobs))
weights = np.vstack(weights)
self.n_iter_, coefs = _spatial_median(weights,
max_iter=self.max_iter,
tol=self.tol)
if self.fit_intercept:
self.intercept_ = coefs[0]
self.coef_ = coefs[1:]
else:
self.intercept_ = 0.
self.coef_ = coefs
return self
评论列表
文章目录