def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
X, y, scorer, parameter_iterable, fit_params,
error_score, cv, **kwargs):
groups = kwargs.pop('groups')
# test_score, n_samples, parameters
out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
delayed(_fit_and_score)(
clone(base_estimator), X, y, scorer,
train, test, verbose, parameters,
fit_params=fit_params,
return_train_score=False,
return_n_test_samples=True,
return_times=False,
return_parameters=True,
error_score=error_score)
for parameters in parameter_iterable
for train, test in cv.split(X, y, groups))
# test_score, n_samples, _, parameters
return [(mod[0], mod[1], None, mod[2]) for mod in out]
python类Parallel()的实例源码
def post(self):
data = tornado.escape.json_decode(self.request.body)
is_debug = data["debug"]
query = data["query"]
message = {"posts": []}
if is_debug:
from elephant_sense.debug import search_posts_dummy
posts = search_posts_dummy(query, count=30)
posts = self.scoring(posts)
message["posts"] = [self.trim(p) for p in posts]
self.write(message)
else:
posts = search_posts(query, n=50) # limit for performance. need improvements for feature extraction.
process = 4
batch_size = len(posts) / process
tasks = [(int(i * batch_size), int(i * batch_size + batch_size)) for i in range(process)]
dones = Parallel(n_jobs=process)(delayed(parallel_scoring)(self.evaluator, posts[t[0]:t[1]]) for t in tasks)
posts = []
for scoreds in dones:
posts += [self.trim(s) for s in scoreds]
posts = sorted(posts, key=lambda p: p["score"], reverse=True)
message["posts"] = posts
self.write(message)
def _compute_features(self, raw_documents):
values = array.array(str("f"))
print "Preloading regexes"
dummy_processor = event_classifier.StringProcessor('')
for name, rule in named_rules_list:
dummy_processor.count_tokens(rule)
print "Computing Features"
result = Parallel(
n_jobs=7 if process_all else 1, verbose=10
)(delayed(process_doc)(fb_event) for event_id, fb_event in raw_documents)
for row_values in result:
values.extend(row_values)
X = np.array(values)
X.shape = (len(raw_documents), len(self.features))
return X
def _extract_and_write(self, X, neighbor_id_lists, distances_to_neighbors, fileName = "l2r_train", y = None):
labels_in_neighborhood = Parallel(n_jobs=self.n_jobs)(
delayed(_create_training_samples)(cur_doc, neighbor_list, X, y, cur_doc + 1, distances_to_neighbors,
self.count_concepts, self.count_terms, self.number_of_concepts,
self.ibm1 if self.n_jobs == 1 and self.translation_probability else None) for cur_doc, neighbor_list in enumerate(neighbor_id_lists))
doc_to_neighborhood_dict = self._merge_dicts(labels_in_neighborhood)
filenames = ["samples_" + str(qid + 1) + ".tmp" for qid in range(len(doc_to_neighborhood_dict))]
with open(fileName, 'w') as outfile:
for fname in filenames:
with open(fname) as infile:
for line in infile:
outfile.write(line)
outfile.write('\n')
return doc_to_neighborhood_dict
def _distarray_missing(self, xc, xd, cdiffs):
"""Distance array for data with missing values"""
cindices = []
dindices = []
for i in range(self._datalen):
cindices.append(np.where(np.isnan(xc[i]))[0])
dindices.append(np.where(np.isnan(xd[i]))[0])
if self.n_jobs != 1:
dist_array = Parallel(n_jobs=self.n_jobs)(delayed(get_row_missing)(xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen))
else:
dist_array = [get_row_missing(xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen)]
return np.array(dist_array)
#==================================================================#
############################# ReliefF ############################################
def _run_algorithm(self):
sm = cnt = 0
for i in range(self._datalen):
sm += sum(self._distance_array[i])
cnt += len(self._distance_array[i])
avg_dist = sm / float(cnt)
attr = self._get_attribute_info()
nan_entries = np.isnan(self._X)
NNlist = [self._find_neighbors(datalen, avg_dist) for datalen in range(self._datalen)]
NN_near_list = [i[0] for i in NNlist]
NN_far_list = [i[1] for i in NNlist]
if self.n_jobs != 1:
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
SURFstar_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0)
else:
scores = np.sum([SURFstar_compute_scores(instance_num, attr, nan_entries, self._num_attributes,
NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)], axis=0)
return np.array(scores)
def _run_algorithm(self):
attr = self._get_attribute_info()
nan_entries = np.isnan(self._X)
NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)]
NN_near_list = [i[0] for i in NNlist]
NN_far_list = [i[1] for i in NNlist]
if self.n_jobs != 1:
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
MultiSURFstar_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0)
else:
scores = np.sum([MultiSURFstar_compute_scores(instance_num, attr, nan_entries, self._num_attributes,
NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)], axis=0)
return np.array(scores)
def _generateFragments(self):
voc=set(self.vocabulary)
fpsdict = dict([(idx,{}) for idx in self.moldata.index])
nrows = self.moldata.shape[0]
counter = 0
with Parallel(n_jobs=self.n_jobs,verbose=self.verbose) as parallel:
while counter < nrows:
nextChunk = min(counter+(self.n_jobs*self.chunksize),nrows)
result = parallel(delayed(_generateMolFrags)(mollist, voc,
self.fragmentMethod,
self.fragIdx)
for mollist in self._produceDataChunks(counter,nextChunk,self.chunksize))
for r in result:
counter+=len(r)
fpsdict.update(r)
self.moldata['fps'] = np.array(sorted(fpsdict.items()))[:,1]
# construct the molecule-fragment matrix as input for the LDA algorithm
def fit_transform(self, X, y=None, **fit_params):
"""
Fits the transformer using ``X`` (and possibly ``y``). Transforms
``X`` using the transformers, uses :func:`pandas.concat`
to horizontally concatenate the results.
Returns:
``self``
"""
verify_x_type(X)
verify_y_type(y)
Xts = joblib.Parallel(n_jobs=self.n_jobs)(
joblib.delayed(_fit_transform)(trans, weight, X, y, **fit_params) for _, trans, weight in self._iter())
return self.__concat(Xts)
def calc_fitness(self,X,labels,fit_choice,sel):
"""computes fitness of individual output yhat.
yhat: output of a program.
labels: correct outputs
fit_choice: choice of fitness function
"""
if 'lexicase' in sel:
# return list(map(lambda yhat: self.f_vec[fit_choice](labels,yhat),X))
return np.asarray(
[self.proper(self.f_vec[fit_choice](labels,
yhat)) for yhat in X],
order='F')
# return list(Parallel(n_jobs=-1)(delayed(self.f_vec[fit_choice])(labels,yhat) for yhat in X))
else:
# return list(map(lambda yhat: self.f[fit_choice](labels,yhat),X))
return np.asarray([self.f[fit_choice](labels,yhat) for yhat in X],
order='F').reshape(-1)
# return list(Parallel(n_jobs=-1)(delayed(self.f[fit_choice])(labels,yhat) for yhat in X))
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
X, y, scorer, parameter_iterable, fit_params,
error_score, cv, **kwargs):
# test_score, n_samples, score_time, parameters
return Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
delayed(_fit_and_score)(
clone(base_estimator), X, y, scorer,
train, test, verbose, parameters,
fit_params, return_parameters=True,
error_score=error_score)
for parameters in parameter_iterable
for train, test in cv)
def Parallel2(*args, **kwargs):
kwargs['backend'] = None
return Parallel(*args, **kwargs)
classification.py 文件源码
项目:decoding-brain-challenge-2016
作者: alexandrebarachant
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def fit(self, X, y, sample_weight=None):
"""Fit (estimates) the centroids.
Parameters
----------
X : ndarray, shape (n_trials, n_channels, n_channels)
ndarray of SPD matrices.
y : ndarray shape (n_trials, 1)
labels corresponding to each trial.
sample_weight : None | ndarray shape (n_trials, 1)
the weights of each sample. if None, each sample is treated with
equal weights.
Returns
-------
self : MDM instance
The MDM instance.
"""
self.classes_ = numpy.unique(y)
self.covmeans_ = []
if sample_weight is None:
sample_weight = numpy.ones(X.shape[0])
if self.n_jobs == 1:
for l in self.classes_:
self.covmeans_.append(
mean_covariance(X[y == l], metric=self.metric_mean,
sample_weight=sample_weight[y == l]))
else:
self.covmeans_ = Parallel(n_jobs=self.n_jobs)(
delayed(mean_covariance)(X[y == l], metric=self.metric_mean,
sample_weight=sample_weight[y == l])
for l in self.classes_)
return self
classification.py 文件源码
项目:decoding-brain-challenge-2016
作者: alexandrebarachant
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def _predict_distances(self, covtest):
"""Helper to predict the distance. equivalent to transform."""
Nc = len(self.covmeans_)
if self.n_jobs == 1:
dist = [distance(covtest, self.covmeans_[m], self.metric_dist)
for m in range(Nc)]
else:
dist = Parallel(n_jobs=self.n_jobs)(delayed(distance)(
covtest, self.covmeans_[m], self.metric_dist)
for m in range(Nc))
dist = numpy.concatenate(dist, axis=1)
return dist
def fit(self, epochs):
self.picks = _handle_picks(info=epochs.info, picks=self.picks)
_check_data(epochs, picks=self.picks,
ch_constraint='single_channel_type', verbose=self.verbose)
self.ch_type = _get_channel_type(epochs, self.picks)
n_epochs = len(epochs)
self.ch_subsets_ = self._get_random_subsets(epochs.info)
self.mappings_ = self._get_mappings(epochs)
n_jobs = check_n_jobs(self.n_jobs)
parallel = Parallel(n_jobs, verbose=10)
my_iterator = delayed(_iterate_epochs)
if self.verbose is not False and self.n_jobs > 1:
print('Iterating epochs ...')
verbose = False if self.n_jobs > 1 else self.verbose
corrs = parallel(my_iterator(self, epochs, idxs, verbose)
for idxs in np.array_split(np.arange(n_epochs),
n_jobs))
self.corr_ = np.concatenate(corrs)
if self.verbose is not False and self.n_jobs > 1:
print('[Done]')
# compute how many windows is a sensor RANSAC-bad
self.bad_log = np.zeros_like(self.corr_)
self.bad_log[self.corr_ < self.min_corr] = 1
bad_log = self.bad_log.sum(axis=0)
bad_idx = np.where(bad_log > self.unbroken_time * n_epochs)[0]
if len(bad_idx) > 0:
self.bad_chs_ = [
epochs.info['ch_names'][self.picks[p]] for p in bad_idx]
else:
self.bad_chs_ = []
return self
def _cpu_map(fun, param_grid, n_jobs, verbose=True):
return Parallel(
n_jobs=n_jobs,
verbose=verbose,
backend='threading', # any sklearn backend should work here
)(
delayed(fun)(
params
)
for params in param_grid)
def _cpu_map(fun, param_grid, n_jobs, verbose):
return Parallel(
n_jobs=n_jobs,
verbose=verbose,
backend='threading', # any sklearn backend should work here
)(
delayed(fun)(
params
)
for params in param_grid)
def search_test_params(base_clf, cv_params, X, y, train, test, scoring):
parameter_iterable = ParameterGrid(cv_params)
grid_scores = Parallel(n_jobs=-1)(
delayed(_fit_and_score)(clone(base_clf), X, y, scoring,
train, test, 0, parameters,
None, return_parameters=True)
for parameters in parameter_iterable)
# grid_scores = [_fit_and_score(clone(base_clf), X, y, scoring, train, test, 0, parameters, None, return_parameters=True) for parameters in parameter_iterable]
grid_scores = sorted(grid_scores, key=lambda x: x[0], reverse=True)
scores, _, _, parameters = grid_scores[0]
return scores, parameters
def score(self, imgs, confounds=None):
"""
Score the images on the learning spatial pipelining, based on the
objective function value that is minimized by the algorithm. Lower
means better fit.
Parameters
----------
imgs: list of Niimg-like objects
See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg.
Data on which PCA must be calculated. If this is a list,
the affine is considered the same for all.
confounds: CSV file path or 2D matrix
This parameter is passed to nilearn.signal.clean. Please see the
related documentation for details
Returns
-------
score: float
Average score on all input data
"""
if (isinstance(imgs, str) or not hasattr(imgs, '__iter__')):
imgs = [imgs]
if confounds is None:
confounds = itertools.repeat(None)
scores = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
delayed(self._cache(_score_img, func_memory_level=1))(
self.coder_, self.masker_, img, these_confounds)
for img, these_confounds in zip(imgs, confounds))
scores = np.array(scores)
try:
len_imgs = np.array([check_niimg(img).get_shape()[3]
for img in imgs])
except ImageFileError:
len_imgs = np.array([np.load(img, mmap_mode='r').shape[0]
for img in imgs])
score = np.sum(scores * len_imgs) / np.sum(len_imgs)
return score
def transform(self, imgs, confounds=None):
"""Compute the mask and the ICA maps across subjects
Parameters
----------
batch_size
imgs: list of Niimg-like objects
See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg.
Data on which PCA must be calculated. If this is a list,
the affine is considered the same for all.
confounds: CSV file path or 2D matrix
This parameter is passed to nilearn.signal.clean. Please see the
related documentation for details
Returns
-------
codes, list of ndarray, shape = n_images * (n_samples, n_components)
Loadings for each of the images, and each of the time steps
"""
if (isinstance(imgs, str) or not hasattr(imgs, '__iter__')):
imgs = [imgs]
if confounds is None:
confounds = itertools.repeat(None)
codes = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
delayed(self._cache(_transform_img, func_memory_level=1))(
self.coder_, self.masker_, img, these_confounds)
for img, these_confounds in zip(imgs, confounds))
return codes
def cross_val_score(estimator, X, y=None, groups=None, scoring=None, cv=None,
n_jobs=1, verbose=0, fit_params=None,
pre_dispatch='2*n_jobs'):
"""
Evaluate a score by cross-validation
"""
if not isinstance(scoring, (list, tuple)):
scoring = [scoring]
X, y, groups = indexable(X, y, groups)
cv = check_cv(cv, y, classifier=is_classifier(estimator))
splits = list(cv.split(X, y, groups))
scorer = [check_scoring(estimator, scoring=s) for s in scoring]
# We clone the estimator to make sure that all the folds are
# independent, and that it is pickle-able.
parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
pre_dispatch=pre_dispatch)
scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer,
train, test, verbose, None,
fit_params)
for train, test in splits)
group_order = []
if hasattr(cv, 'groups'):
group_order = [np.array(cv.groups)[test].tolist()[0] for _, test in splits]
return np.squeeze(np.array(scores)), group_order
def permutation_test_score(estimator, X, y, groups=None, cv=None,
n_permutations=100, n_jobs=1, random_state=0,
verbose=0, scoring=None):
"""
Evaluate the significance of a cross-validated score with permutations,
as in test 1 of [Ojala2010]_.
A modification of original sklearn's permutation test score function
to evaluate p-value outside this function, so that the score can be
reused from outside.
.. [Ojala2010] Ojala and Garriga. Permutation Tests for Studying Classifier
Performance. The Journal of Machine Learning Research (2010)
vol. 11
"""
X, y, groups = indexable(X, y, groups)
cv = check_cv(cv, y, classifier=is_classifier(estimator))
scorer = check_scoring(estimator, scoring=scoring)
random_state = check_random_state(random_state)
# We clone the estimator to make sure that all the folds are
# independent, and that it is pickle-able.
permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
delayed(_permutation_test_score)(
clone(estimator), X, _shuffle(y, groups, random_state),
groups, cv, scorer)
for _ in range(n_permutations))
permutation_scores = np.array(permutation_scores)
return permutation_scores
def anomaly_score(self, X=None):
"""Compute anomaly scores for test samples.
Parameters
----------
X : array-like of shape (n_samples, n_features), default None
Test samples.
Returns
-------
y_score : array-like of shape (n_samples,)
Anomaly scores for test samples.
"""
check_is_fitted(self, '_knn')
if X is None:
X = self._knn._fit_X
ind = self._knn.kneighbors(None, return_distance=False)
else:
X = check_array(X)
ind = self._knn.kneighbors(X, return_distance=False)
n_samples, _ = X.shape
try:
result = Parallel(self.n_jobs)(
delayed(_abof)(
X[s], ind[s], self._knn._fit_X
) for s in gen_even_slices(n_samples, self.n_jobs)
)
except FloatingPointError as e:
raise ValueError('X must not contain training samples') from e
return -np.concatenate(result)
def _pairwise_wmd(self, X_test, X_train=None):
"""Computes the word mover's distance between all train and test points.
Parallelized over rows of X_test.
Assumes that train and test samples are sparse BOW vectors summing to 1.
Parameters
----------
X_test: scipy.sparse matrix, shape: (n_test_samples, vocab_size)
Test samples.
X_train: scipy.sparse matrix, shape: (n_train_samples, vocab_size)
Training samples. If `None`, uses the samples the estimator was fit with.
Returns
-------
dist : array, shape: (n_test_samples, n_train_samples)
Distances between all test samples and all train samples.
"""
n_samples_test = X_test.shape[0]
if X_train is None: X_train = self._fit_X
if self.n_jobs == 1: dist = [ self._wmd_row( test_sample , X_train ) for test_sample in X_test ]
else: dist = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._wmd_row) (test_sample, X_train) for test_sample in X_test)
return np.array(dist)
def _extract_features(self, topNIndices, topNDistances, y, distances):
samples = self._split_samples(topNIndices, topNDistances, y)
training_data_list = Parallel(n_jobs=self.n_jobs)(
delayed(_analyze)(tI, tD, y, distances, self.dependencies) for tI, tD, y in samples)
# merge training data
training_data = defaultdict(list)
for training_data_dict in training_data_list:
for label, training_samples_of_label in training_data_dict.items():
training_data[label].extend(training_data_dict[label])
return training_data
def fit(self, X, y):
"""Fit one regressor for each quantile.
Parameters
----------
* `X` [array-like, shape=(n_samples, n_features)]:
Training vectors, where `n_samples` is the number of samples
and `n_features` is the number of features.
* `y` [array-like, shape=(n_samples,)]:
Target values (real numbers in regression)
"""
rng = check_random_state(self.random_state)
if self.base_estimator is None:
base_estimator = GradientBoostingRegressor(loss='quantile')
else:
base_estimator = self.base_estimator
if not isinstance(base_estimator, GradientBoostingRegressor):
raise ValueError('base_estimator has to be of type'
' GradientBoostingRegressor.')
if not base_estimator.loss == 'quantile':
raise ValueError('base_estimator has to use quantile'
' loss not %s' % base_estimator.loss)
# The predictions for different quantiles should be sorted.
# Therefore each of the regressors need the same seed.
base_estimator.set_params(random_state=rng)
regressors = []
for q in self.quantiles:
regressor = clone(base_estimator)
regressor.set_params(alpha=q)
regressors.append(regressor)
self.regressors_ = Parallel(n_jobs=self.n_jobs, backend='threading')(
delayed(_parallel_fit)(regressor, X, y)
for regressor in regressors)
return self
def fit_transform(self, X, y=None, **fit_params):
self._validate_transformers()
result = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_transform_one)(trans, name, weight, X, y, **fit_params)
for name, trans, weight in self._iter())
to_concats = [r[0] for r in result]
if self.drop:
return self._concat_just_right(to_concats[0], to_concats[1:])
return self._concat_just_right(X, to_concats)
def transform(self, X):
Xs = Parallel(n_jobs=self.n_jobs)(
delayed(_transform_one)(trans, name, weight, X)
for name, trans, weight in self._iter())
if self.drop:
return self._concat_just_right(Xs[0], Xs[1:])
return self._concat_just_right(X, Xs)
def fit_transform(self, X, y=None, **fit_params):
self._validate_transformers()
result = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_transform_one)(trans, name, weight, X, y, **fit_params)
for name, trans, weight in self._iter())
Xs = [r[0] for r in result]
Xs.insert(0, X)
return pd.concat(Xs, axis=0)
def transform(self, X):
Xs = Parallel(n_jobs=self.n_jobs)(
delayed(_transform_one)(trans, name, weight, X)
for name, trans, weight in self._iter())
Xs.insert(0, X)
return pd.concat(Xs, axis=0)