def post(self):
data = tornado.escape.json_decode(self.request.body)
is_debug = data["debug"]
query = data["query"]
message = {"posts": []}
if is_debug:
from elephant_sense.debug import search_posts_dummy
posts = search_posts_dummy(query, count=30)
posts = self.scoring(posts)
message["posts"] = [self.trim(p) for p in posts]
self.write(message)
else:
posts = search_posts(query, n=50) # limit for performance. need improvements for feature extraction.
process = 4
batch_size = len(posts) / process
tasks = [(int(i * batch_size), int(i * batch_size + batch_size)) for i in range(process)]
dones = Parallel(n_jobs=process)(delayed(parallel_scoring)(self.evaluator, posts[t[0]:t[1]]) for t in tasks)
posts = []
for scoreds in dones:
posts += [self.trim(s) for s in scoreds]
posts = sorted(posts, key=lambda p: p["score"], reverse=True)
message["posts"] = posts
self.write(message)
python类delayed()的实例源码
def run(n_seeds, n_jobs, _run, _seed):
seed_list = check_random_state(_seed).randint(np.iinfo(np.uint32).max,
size=n_seeds)
exps = []
exps += [{'method': 'sgd',
'step_size': step_size}
for step_size in np.logspace(-3, 3, 7)]
exps += [{'method': 'gram',
'reduction': reduction}
for reduction in [1, 4, 6, 8, 12, 24]]
rundir = join(basedir, str(_run._id), 'run')
if not os.path.exists(rundir):
os.makedirs(rundir)
Parallel(n_jobs=n_jobs,
verbose=10)(delayed(single_run)(config_updates, rundir, i)
for i, config_updates in enumerate(exps))
def run(n_seeds, n_jobs, _run, _seed):
seed_list = check_random_state(_seed).randint(np.iinfo(np.uint32).max,
size=n_seeds)
exps = []
exps += [{'method': 'sgd',
'step_size': step_size}
for step_size in np.logspace(-7, -7, 1)]
exps += [{'method': 'gram',
'reduction': reduction}
for reduction in [12]]
rundir = join(basedir, str(_run._id), 'run')
if not os.path.exists(rundir):
os.makedirs(rundir)
Parallel(n_jobs=n_jobs,
verbose=10)(delayed(single_run)(config_updates, rundir, i)
for i, config_updates in enumerate(exps))
def _compute_features(self, raw_documents):
values = array.array(str("f"))
print "Preloading regexes"
dummy_processor = event_classifier.StringProcessor('')
for name, rule in named_rules_list:
dummy_processor.count_tokens(rule)
print "Computing Features"
result = Parallel(
n_jobs=7 if process_all else 1, verbose=10
)(delayed(process_doc)(fb_event) for event_id, fb_event in raw_documents)
for row_values in result:
values.extend(row_values)
X = np.array(values)
X.shape = (len(raw_documents), len(self.features))
return X
def _distarray_missing(self, xc, xd, cdiffs):
"""Distance array for data with missing values"""
cindices = []
dindices = []
for i in range(self._datalen):
cindices.append(np.where(np.isnan(xc[i]))[0])
dindices.append(np.where(np.isnan(xd[i]))[0])
if self.n_jobs != 1:
dist_array = Parallel(n_jobs=self.n_jobs)(delayed(get_row_missing)(xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen))
else:
dist_array = [get_row_missing(xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen)]
return np.array(dist_array)
#==================================================================#
############################# ReliefF ############################################
def _run_algorithm(self):
sm = cnt = 0
for i in range(self._datalen):
sm += sum(self._distance_array[i])
cnt += len(self._distance_array[i])
avg_dist = sm / float(cnt)
attr = self._get_attribute_info()
nan_entries = np.isnan(self._X)
NNlist = [self._find_neighbors(datalen, avg_dist) for datalen in range(self._datalen)]
NN_near_list = [i[0] for i in NNlist]
NN_far_list = [i[1] for i in NNlist]
if self.n_jobs != 1:
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
SURFstar_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0)
else:
scores = np.sum([SURFstar_compute_scores(instance_num, attr, nan_entries, self._num_attributes,
NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)], axis=0)
return np.array(scores)
def _run_algorithm(self):
attr = self._get_attribute_info()
nan_entries = np.isnan(self._X)
NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)]
NN_near_list = [i[0] for i in NNlist]
NN_far_list = [i[1] for i in NNlist]
if self.n_jobs != 1:
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
MultiSURFstar_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0)
else:
scores = np.sum([MultiSURFstar_compute_scores(instance_num, attr, nan_entries, self._num_attributes,
NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)], axis=0)
return np.array(scores)
def _generateFragments(self):
voc=set(self.vocabulary)
fpsdict = dict([(idx,{}) for idx in self.moldata.index])
nrows = self.moldata.shape[0]
counter = 0
with Parallel(n_jobs=self.n_jobs,verbose=self.verbose) as parallel:
while counter < nrows:
nextChunk = min(counter+(self.n_jobs*self.chunksize),nrows)
result = parallel(delayed(_generateMolFrags)(mollist, voc,
self.fragmentMethod,
self.fragIdx)
for mollist in self._produceDataChunks(counter,nextChunk,self.chunksize))
for r in result:
counter+=len(r)
fpsdict.update(r)
self.moldata['fps'] = np.array(sorted(fpsdict.items()))[:,1]
# construct the molecule-fragment matrix as input for the LDA algorithm
def fit_transform(self, X, y=None, **fit_params):
"""
Fits the transformer using ``X`` (and possibly ``y``). Transforms
``X`` using the transformers, uses :func:`pandas.concat`
to horizontally concatenate the results.
Returns:
``self``
"""
verify_x_type(X)
verify_y_type(y)
Xts = joblib.Parallel(n_jobs=self.n_jobs)(
joblib.delayed(_fit_transform)(trans, weight, X, y, **fit_params) for _, trans, weight in self._iter())
return self.__concat(Xts)
def calc_fitness(self,X,labels,fit_choice,sel):
"""computes fitness of individual output yhat.
yhat: output of a program.
labels: correct outputs
fit_choice: choice of fitness function
"""
if 'lexicase' in sel:
# return list(map(lambda yhat: self.f_vec[fit_choice](labels,yhat),X))
return np.asarray(
[self.proper(self.f_vec[fit_choice](labels,
yhat)) for yhat in X],
order='F')
# return list(Parallel(n_jobs=-1)(delayed(self.f_vec[fit_choice])(labels,yhat) for yhat in X))
else:
# return list(map(lambda yhat: self.f[fit_choice](labels,yhat),X))
return np.asarray([self.f[fit_choice](labels,yhat) for yhat in X],
order='F').reshape(-1)
# return list(Parallel(n_jobs=-1)(delayed(self.f[fit_choice])(labels,yhat) for yhat in X))
classification.py 文件源码
项目:decoding-brain-challenge-2016
作者: alexandrebarachant
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def fit(self, X, y, sample_weight=None):
"""Fit (estimates) the centroids.
Parameters
----------
X : ndarray, shape (n_trials, n_channels, n_channels)
ndarray of SPD matrices.
y : ndarray shape (n_trials, 1)
labels corresponding to each trial.
sample_weight : None | ndarray shape (n_trials, 1)
the weights of each sample. if None, each sample is treated with
equal weights.
Returns
-------
self : MDM instance
The MDM instance.
"""
self.classes_ = numpy.unique(y)
self.covmeans_ = []
if sample_weight is None:
sample_weight = numpy.ones(X.shape[0])
if self.n_jobs == 1:
for l in self.classes_:
self.covmeans_.append(
mean_covariance(X[y == l], metric=self.metric_mean,
sample_weight=sample_weight[y == l]))
else:
self.covmeans_ = Parallel(n_jobs=self.n_jobs)(
delayed(mean_covariance)(X[y == l], metric=self.metric_mean,
sample_weight=sample_weight[y == l])
for l in self.classes_)
return self
classification.py 文件源码
项目:decoding-brain-challenge-2016
作者: alexandrebarachant
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def _predict_distances(self, covtest):
"""Helper to predict the distance. equivalent to transform."""
Nc = len(self.covmeans_)
if self.n_jobs == 1:
dist = [distance(covtest, self.covmeans_[m], self.metric_dist)
for m in range(Nc)]
else:
dist = Parallel(n_jobs=self.n_jobs)(delayed(distance)(
covtest, self.covmeans_[m], self.metric_dist)
for m in range(Nc))
dist = numpy.concatenate(dist, axis=1)
return dist
def fit(self, epochs):
self.picks = _handle_picks(info=epochs.info, picks=self.picks)
_check_data(epochs, picks=self.picks,
ch_constraint='single_channel_type', verbose=self.verbose)
self.ch_type = _get_channel_type(epochs, self.picks)
n_epochs = len(epochs)
self.ch_subsets_ = self._get_random_subsets(epochs.info)
self.mappings_ = self._get_mappings(epochs)
n_jobs = check_n_jobs(self.n_jobs)
parallel = Parallel(n_jobs, verbose=10)
my_iterator = delayed(_iterate_epochs)
if self.verbose is not False and self.n_jobs > 1:
print('Iterating epochs ...')
verbose = False if self.n_jobs > 1 else self.verbose
corrs = parallel(my_iterator(self, epochs, idxs, verbose)
for idxs in np.array_split(np.arange(n_epochs),
n_jobs))
self.corr_ = np.concatenate(corrs)
if self.verbose is not False and self.n_jobs > 1:
print('[Done]')
# compute how many windows is a sensor RANSAC-bad
self.bad_log = np.zeros_like(self.corr_)
self.bad_log[self.corr_ < self.min_corr] = 1
bad_log = self.bad_log.sum(axis=0)
bad_idx = np.where(bad_log > self.unbroken_time * n_epochs)[0]
if len(bad_idx) > 0:
self.bad_chs_ = [
epochs.info['ch_names'][self.picks[p]] for p in bad_idx]
else:
self.bad_chs_ = []
return self
def cross_val_score(estimator, X, y=None, groups=None, scoring=None, cv=None,
n_jobs=1, verbose=0, fit_params=None,
pre_dispatch='2*n_jobs'):
"""
Evaluate a score by cross-validation
"""
if not isinstance(scoring, (list, tuple)):
scoring = [scoring]
X, y, groups = indexable(X, y, groups)
cv = check_cv(cv, y, classifier=is_classifier(estimator))
splits = list(cv.split(X, y, groups))
scorer = [check_scoring(estimator, scoring=s) for s in scoring]
# We clone the estimator to make sure that all the folds are
# independent, and that it is pickle-able.
parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
pre_dispatch=pre_dispatch)
scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer,
train, test, verbose, None,
fit_params)
for train, test in splits)
group_order = []
if hasattr(cv, 'groups'):
group_order = [np.array(cv.groups)[test].tolist()[0] for _, test in splits]
return np.squeeze(np.array(scores)), group_order
def permutation_test_score(estimator, X, y, groups=None, cv=None,
n_permutations=100, n_jobs=1, random_state=0,
verbose=0, scoring=None):
"""
Evaluate the significance of a cross-validated score with permutations,
as in test 1 of [Ojala2010]_.
A modification of original sklearn's permutation test score function
to evaluate p-value outside this function, so that the score can be
reused from outside.
.. [Ojala2010] Ojala and Garriga. Permutation Tests for Studying Classifier
Performance. The Journal of Machine Learning Research (2010)
vol. 11
"""
X, y, groups = indexable(X, y, groups)
cv = check_cv(cv, y, classifier=is_classifier(estimator))
scorer = check_scoring(estimator, scoring=scoring)
random_state = check_random_state(random_state)
# We clone the estimator to make sure that all the folds are
# independent, and that it is pickle-able.
permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
delayed(_permutation_test_score)(
clone(estimator), X, _shuffle(y, groups, random_state),
groups, cv, scorer)
for _ in range(n_permutations))
permutation_scores = np.array(permutation_scores)
return permutation_scores
def _pairwise_wmd(self, X_test, X_train=None):
"""Computes the word mover's distance between all train and test points.
Parallelized over rows of X_test.
Assumes that train and test samples are sparse BOW vectors summing to 1.
Parameters
----------
X_test: scipy.sparse matrix, shape: (n_test_samples, vocab_size)
Test samples.
X_train: scipy.sparse matrix, shape: (n_train_samples, vocab_size)
Training samples. If `None`, uses the samples the estimator was fit with.
Returns
-------
dist : array, shape: (n_test_samples, n_train_samples)
Distances between all test samples and all train samples.
"""
n_samples_test = X_test.shape[0]
if X_train is None: X_train = self._fit_X
if self.n_jobs == 1: dist = [ self._wmd_row( test_sample , X_train ) for test_sample in X_test ]
else: dist = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._wmd_row) (test_sample, X_train) for test_sample in X_test)
return np.array(dist)
def _run_algorithm(self):
attr = self._get_attribute_info()
nan_entries = np.isnan(self._X)
NNlist = map(self._find_neighbors, range(self._datalen))
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
ReliefF_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
NN, self._headers, self._class_type, self._X, self._y, self._labels_std)
for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0)
return np.array(scores)
def _run_algorithm(self):
attr = self._get_attribute_info()
nan_entries = np.isnan(self._X)
NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)]
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
MultiSURF_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
NN_near, self._headers, self._class_type, self._X, self._y, self._labels_std)
for instance_num, NN_near in zip(range(self._datalen), NNlist)), axis=0)
return np.array(scores)
def transform(self, X, *args, **kwargs):
"""
Transforms ``X`` using the transformers, uses :func:`pandas.concat`
to horizontally concatenate the results.
"""
verify_x_type(X)
Xts = joblib.Parallel(n_jobs=self.n_jobs)(
joblib.delayed(_transform)(trans, weight, X, *args, **kwargs) for _, trans, weight in self._iter())
return self.__concat(Xts)
def transform(self,x,inds=None,labels = None):
"""return a transformation of x using population outputs"""
if inds:
# return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype)
# for I in inds)).transpose()
return np.asarray(
[self.out(I,x,labels,self.otype) for I in inds]).transpose()
elif self._best_inds:
# return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype)
# for I in self._best_inds)).transpose()
return np.asarray(
[self.out(I,x,labels,self.otype) for I in self._best_inds]).transpose()
else:
return x
def get_mi_vector(MI_FS, F, s):
"""
Calculates the Mututal Information between each feature in F and s.
This function is for when |S| > 1. s is the previously selected feature.
We exploite the fact that this step is embarrassingly parallel.
"""
MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_mi)(f, s, MI_FS)
for f in F)
return MIs
def get_first_mi_vector(MI_FS, k):
"""
Calculates the Mututal Information between each feature in X and y.
This function is for when |S| = 0. We select the first feautre in S.
"""
n, p = MI_FS.X.shape
MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_first_mi)(i, k, MI_FS)
for i in xrange(p))
return MIs
def transform_imgs(self, imgs_list, confounds=None, copy=True, n_jobs=1,
mmap_mode=None):
"""Prepare multi subject data in parallel
Parameters
----------
imgs_list: list of Niimg-like objects
See http://nilearn.github.io/manipulating_images/input_output.html.
List of imgs file to prepare. One item per subject.
confounds: list of confounds, optional
List of confounds (2D arrays or filenames pointing to CSV
files). Must be of same length than imgs_list.
copy: boolean, optional
If True, guarantees that output array has no memory in common with
input array.
n_jobs: integer, optional
The number of cpus to use to do the computation. -1 means
'all cpus'.
Returns
-------
region_signals: list of 2D numpy.ndarray
List of signal for each element per subject.
shape: list of (number of scans, number of elements)
"""
self._check_fitted()
raw = True
# Check whether all imgs from imgs_list are numpy instance, or fallback
# to MultiNiftiMasker (could handle hybrid imgs_list but we do not
# need it for the moment)
for imgs in imgs_list:
if isinstance(imgs, str):
name, ext = os.path.splitext(imgs)
if ext != '.npy':
raw = False
break
elif not isinstance(imgs, np.ndarray):
raw = False
break
if raw:
data = Parallel(n_jobs=n_jobs)(delayed(np.load)(imgs,
mmap_mode=mmap_mode)
for imgs in imgs_list)
return data
else:
return MultiNiftiMasker.transform_imgs(self, imgs_list,
confounds=confounds,
copy=copy,
n_jobs=n_jobs, )
modifiedGridSearchCV.py 文件源码
项目:CerebralCortex-2.0-legacy
作者: MD2Korg
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def fit(self, X, y):
"""Actual fitting, performing the search over parameters."""
parameter_iterable = ParameterGrid(self.param_grid)
estimator = self.estimator
cv = self.cv
n_samples = _num_samples(X)
X, y = indexable(X, y)
if y is not None:
if len(y) != n_samples:
raise ValueError('Target variable (y) has a different number '
'of samples (%i) than data (X: %i samples)'
% (len(y), n_samples))
cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
if self.verbose > 0:
if isinstance(parameter_iterable, Sized):
n_candidates = len(parameter_iterable)
print("Fitting {0} folds for each of {1} candidates, totalling"
" {2} fits".format(len(cv), n_candidates,
n_candidates * len(cv)))
base_estimator = clone(self.estimator)
pre_dispatch = self.pre_dispatch
out = Parallel(
n_jobs=self.n_jobs, verbose=self.verbose,
pre_dispatch=pre_dispatch
)(delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
parameters, cv=cv)
for parameters in parameter_iterable)
best = sorted(out, key=lambda x: x[0])[-1]
self.best_params_ = best[1]
self.best_score_ = best[0]
if self.refit:
# fit the best estimator using the entire dataset
# clone first to work around broken estimators
best_estimator = clone(base_estimator).set_params(
**best[1])
if y is not None:
best_estimator.fit(X, y, **self.fit_params)
else:
best_estimator.fit(X, **self.fit_params)
self.best_estimator_ = best_estimator
return self