def _run_algorithm(self):
attr = self._get_attribute_info()
nan_entries = np.isnan(self._X)
NNlist = map(self._find_neighbors, range(self._datalen))
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
ReliefF_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
NN, self._headers, self._class_type, self._X, self._y, self._labels_std)
for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0)
return np.array(scores)
python类Parallel()的实例源码
def _run_algorithm(self):
attr = self._get_attribute_info()
nan_entries = np.isnan(self._X)
NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)]
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
MultiSURF_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
NN_near, self._headers, self._class_type, self._X, self._y, self._labels_std)
for instance_num, NN_near in zip(range(self._datalen), NNlist)), axis=0)
return np.array(scores)
def transform(self, X, *args, **kwargs):
"""
Transforms ``X`` using the transformers, uses :func:`pandas.concat`
to horizontally concatenate the results.
"""
verify_x_type(X)
Xts = joblib.Parallel(n_jobs=self.n_jobs)(
joblib.delayed(_transform)(trans, weight, X, *args, **kwargs) for _, trans, weight in self._iter())
return self.__concat(Xts)
def transform(self,x,inds=None,labels = None):
"""return a transformation of x using population outputs"""
if inds:
# return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype)
# for I in inds)).transpose()
return np.asarray(
[self.out(I,x,labels,self.otype) for I in inds]).transpose()
elif self._best_inds:
# return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype)
# for I in self._best_inds)).transpose()
return np.asarray(
[self.out(I,x,labels,self.otype) for I in self._best_inds]).transpose()
else:
return x
def _fit_multiclass(self, X, y, alpha, C, learning_rate,
sample_weight, n_iter):
"""Fit a multi-class classifier by combining binary classifiers
Each binary classifier predicts one class versus all others. This
strategy is called OVA: One Versus All.
"""
# Use joblib to fit OvA in parallel.
result = Parallel(n_jobs=self.n_jobs, backend="threading",
verbose=self.verbose)(
delayed(fit_binary)(self, i, X, y, alpha, C, learning_rate,
n_iter, self._expanded_class_weight[i], 1.,
sample_weight)
for i in range(len(self.classes_)))
for i, (_, intercept) in enumerate(result):
self.intercept_[i] = intercept
self.t_ += n_iter * X.shape[0]
if self.average > 0:
if self.average <= self.t_ - 1.0:
self.coef_ = self.average_coef_
self.intercept_ = self.average_intercept_
else:
self.coef_ = self.standard_coef_
self.standard_intercept_ = np.atleast_1d(self.intercept_)
self.intercept_ = self.standard_intercept_
def _fit_multiclass(self, X, y, alpha, C, learning_rate,
sample_weight, n_iter):
"""Fit a multi-class classifier by combining binary classifiers
Each binary classifier predicts one class versus all others. This
strategy is called OVA: One Versus All.
"""
# Use joblib to fit OvA in parallel.
result = Parallel(n_jobs=self.n_jobs, backend="threading",
verbose=self.verbose)(
delayed(fit_binary)(self, i, X, y, alpha, C, learning_rate,
n_iter, self._expanded_class_weight[i], 1.,
sample_weight)
for i in range(len(self.classes_)))
for i, (_, intercept) in enumerate(result):
self.intercept_[i] = intercept
self.t_ += n_iter * X.shape[0]
if self.average > 0:
if self.average <= self.t_ - 1.0:
self.coef_ = self.average_coef_
self.intercept_ = self.average_intercept_
else:
self.coef_ = self.standard_coef_
self.standard_intercept_ = np.atleast_1d(self.intercept_)
self.intercept_ = self.standard_intercept_
def get_mi_vector(MI_FS, F, s):
"""
Calculates the Mututal Information between each feature in F and s.
This function is for when |S| > 1. s is the previously selected feature.
We exploite the fact that this step is embarrassingly parallel.
"""
MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_mi)(f, s, MI_FS)
for f in F)
return MIs
def get_first_mi_vector(MI_FS, k):
"""
Calculates the Mututal Information between each feature in X and y.
This function is for when |S| = 0. We select the first feautre in S.
"""
n, p = MI_FS.X.shape
MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_first_mi)(i, k, MI_FS)
for i in xrange(p))
return MIs
def transform_imgs(self, imgs_list, confounds=None, copy=True, n_jobs=1,
mmap_mode=None):
"""Prepare multi subject data in parallel
Parameters
----------
imgs_list: list of Niimg-like objects
See http://nilearn.github.io/manipulating_images/input_output.html.
List of imgs file to prepare. One item per subject.
confounds: list of confounds, optional
List of confounds (2D arrays or filenames pointing to CSV
files). Must be of same length than imgs_list.
copy: boolean, optional
If True, guarantees that output array has no memory in common with
input array.
n_jobs: integer, optional
The number of cpus to use to do the computation. -1 means
'all cpus'.
Returns
-------
region_signals: list of 2D numpy.ndarray
List of signal for each element per subject.
shape: list of (number of scans, number of elements)
"""
self._check_fitted()
raw = True
# Check whether all imgs from imgs_list are numpy instance, or fallback
# to MultiNiftiMasker (could handle hybrid imgs_list but we do not
# need it for the moment)
for imgs in imgs_list:
if isinstance(imgs, str):
name, ext = os.path.splitext(imgs)
if ext != '.npy':
raw = False
break
elif not isinstance(imgs, np.ndarray):
raw = False
break
if raw:
data = Parallel(n_jobs=n_jobs)(delayed(np.load)(imgs,
mmap_mode=mmap_mode)
for imgs in imgs_list)
return data
else:
return MultiNiftiMasker.transform_imgs(self, imgs_list,
confounds=confounds,
copy=copy,
n_jobs=n_jobs, )
modifiedGridSearchCV.py 文件源码
项目:CerebralCortex-2.0-legacy
作者: MD2Korg
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def fit(self, X, y):
"""Actual fitting, performing the search over parameters."""
parameter_iterable = ParameterGrid(self.param_grid)
estimator = self.estimator
cv = self.cv
n_samples = _num_samples(X)
X, y = indexable(X, y)
if y is not None:
if len(y) != n_samples:
raise ValueError('Target variable (y) has a different number '
'of samples (%i) than data (X: %i samples)'
% (len(y), n_samples))
cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
if self.verbose > 0:
if isinstance(parameter_iterable, Sized):
n_candidates = len(parameter_iterable)
print("Fitting {0} folds for each of {1} candidates, totalling"
" {2} fits".format(len(cv), n_candidates,
n_candidates * len(cv)))
base_estimator = clone(self.estimator)
pre_dispatch = self.pre_dispatch
out = Parallel(
n_jobs=self.n_jobs, verbose=self.verbose,
pre_dispatch=pre_dispatch
)(delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
parameters, cv=cv)
for parameters in parameter_iterable)
best = sorted(out, key=lambda x: x[0])[-1]
self.best_params_ = best[1]
self.best_score_ = best[0]
if self.refit:
# fit the best estimator using the entire dataset
# clone first to work around broken estimators
best_estimator = clone(base_estimator).set_params(
**best[1])
if y is not None:
best_estimator.fit(X, y, **self.fit_params)
else:
best_estimator.fit(X, **self.fit_params)
self.best_estimator_ = best_estimator
return self
modifiedRandomizedSearchCV.py 文件源码
项目:CerebralCortex-2.0-legacy
作者: MD2Korg
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def fit(self, X, y):
"""Actual fitting, performing the search over parameters."""
parameter_iterable = ParameterSampler(self.param_distributions,
self.n_iter,
random_state=self.random_state)
estimator = self.estimator
cv = self.cv
n_samples = _num_samples(X)
X, y = indexable(X, y)
if y is not None:
if len(y) != n_samples:
raise ValueError('Target variable (y) has a different number '
'of samples (%i) than data (X: %i samples)'
% (len(y), n_samples))
cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
if self.verbose > 0:
if isinstance(parameter_iterable, Sized):
n_candidates = len(parameter_iterable)
print("Fitting {0} folds for each of {1} candidates, totalling"
" {2} fits".format(len(cv), n_candidates,
n_candidates * len(cv)))
base_estimator = clone(self.estimator)
pre_dispatch = self.pre_dispatch
out = Parallel(
n_jobs=self.n_jobs, verbose=self.verbose,
pre_dispatch=pre_dispatch
)(
delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
parameters, cv=cv)
for parameters in parameter_iterable)
best = sorted(out, reverse=True)[0]
self.best_params_ = best[1]
self.best_score_ = best[0]
if self.refit:
# fit the best estimator using the entire dataset
# clone first to work around broken estimators
best_estimator = clone(base_estimator).set_params(
**best[1])
if y is not None:
best_estimator.fit(X, y, **self.fit_params)
else:
best_estimator.fit(X, **self.fit_params)
self.best_estimator_ = best_estimator
return self
def run(n_calls=32, n_runs=1, save_traces=True, n_jobs=1):
"""
Main function used to run the experiments.
Parameters
----------
* `n_calls`: int
Evaluation budget.
* `n_runs`: int
Number of times to repeat the optimization in order to average out noise.
* `save_traces`: bool
Whether or not to save data collected during optimization
* `n_jobs`: int
Number of different repeats of optimization to run in parallel.
"""
surrogate_minimizers = [gbrt_minimize, forest_minimize, gp_minimize]
selected_models = sorted(MODELS, key=lambda x: x.__name__)
selected_datasets = (DATASETS.keys())
# all the parameter values and objectives collected during execution are stored in list below
all_data = {}
for model in selected_models:
all_data[model] = {}
for dataset in selected_datasets:
if not issubclass(model, DATASETS[dataset]):
continue
all_data[model][dataset] = {}
for surrogate_minimizer in surrogate_minimizers:
print(surrogate_minimizer.__name__, model.__name__, dataset)
seeds = np.random.randint(0, 2**30, n_runs)
raw_trace = Parallel(n_jobs=n_jobs)(
delayed(evaluate_optimizer)(
surrogate_minimizer, model, dataset, n_calls, seed
) for seed in seeds
)
all_data[model][dataset][surrogate_minimizer.__name__] = raw_trace
# convert the model keys to strings so that results can be saved as json
all_data = {k.__name__: v for k,v in all_data.items()}
# dump the recorded objective values as json
if save_traces:
with open(datetime.now().strftime("%m_%Y_%d_%H_%m_%s")+'.json', 'w') as f:
json.dump(all_data, f)
calculate_performance(all_data)