def trainWord2Vector(sentence_count, vector_dimension, train_count):
lines, model_out, vector_out = "com/com/test1/test1sources/splited_words.txt", \
"com/com/test1/test1sources/word2vec.model", \
"com/com/test1/test1sources/word2vec.vector"
logging.info("??????")
sentences = LineSentence(lines)
# ??min_count=3??????3?? ????????????word2vec.vector?
# workers????????????CPU?? ???3
model = Word2Vec(sentences, sg=1, size=vector_dimension, window=8,
min_count=0, workers=multiprocessing.cpu_count())
# ????? ??????
for i in range(train_count):
model.train(sentences=sentences, total_examples=sentence_count, epochs=model.iter)
# trim unneeded model memory = use(much) less RAM
# model.init_sims(replace=True)
model.save(model_out)
model.wv.save_word2vec_format(vector_out)
python类cpu_count()的实例源码
def uptime(self):
with open('/proc/uptime', 'r') as f:
uptime, idletime = f.readline().split()
up_seconds = int(float(uptime))
idle_seconds = int(float(idletime))
# in some machine like Linode VPS, idle time may bigger than up time
if idle_seconds > up_seconds:
cpu_count = multiprocessing.cpu_count()
idle_seconds = idle_seconds/cpu_count
# in some VPS, this value may still bigger than up time
# may be the domain 0 machine has more cores
# we calclate approximately for it
if idle_seconds > up_seconds:
for n in range(2,10):
if idle_seconds/n < up_seconds:
idle_seconds = idle_seconds/n
break
fmt = '{days} ? {hours} ?? {minutes} ? {seconds} ?'
uptime_string = strfdelta(datetime.timedelta(seconds = up_seconds), fmt)
idletime_string = strfdelta(datetime.timedelta(seconds = idle_seconds), fmt)
return {
'up': uptime_string,
'idle': idletime_string,
'idle_rate': div_percent(idle_seconds, up_seconds),
}
def main():
input_dir, output_dir = getDirs()
table_list = listFiles(input_dir)
concurrency = cpu_count()
print 'Using {0:d} Processes'.format(concurrency)
pool = Pool(concurrency)
# perform the passed in write action (function) for each csv row
time_capture = TimeCapture(time.time())
results = pool.map(
multiprocess,
izip(repeat(output_dir),
[copy.deepcopy(time_capture) for i in range(len(table_list))],
table_list,
repeat(write)))
time_capture.end(1)
pool.close()
pool.join()
print 'Finished Successfully!'
displayResults(results, time_capture.total_time)
def __init__(self, max_workers=None):
"""Initializes a new ThreadPoolExecutor instance.
Args:
max_workers: The maximum number of threads that can be used to
execute the given calls.
"""
if max_workers is None:
# Use this number because ThreadPoolExecutor is often
# used to overlap I/O instead of CPU work.
max_workers = (cpu_count() or 1) * 5
if max_workers <= 0:
raise ValueError("max_workers must be greater than 0")
self._max_workers = max_workers
self._work_queue = queue.Queue()
self._threads = set()
self._shutdown = False
self._shutdown_lock = threading.Lock()
def _execute_models(self, gmnn_queue=[], is_gm11=False):
if self._is_empty(gmnn_queue):
return []
pool = mp.Pool()
cpu_count = self.cpu_count
length = len(gmnn_queue)
block_count = long(math.ceil(length / float(cpu_count)))
start_index = 0
end_length = cpu_count
for block in xrange(0, block_count):
for gm_model in gmnn_queue[start_index:end_length]:
if is_gm11 == False:
pool.apply_async(gm_model.analyze())
else:
pool.apply_async(gm_model.forecast())
start_index += cpu_count
end_length += cpu_count
if end_length > length:
end_length = length
self._close_pool(pool)
return gmnn_queue
def uptime(self):
with open('/proc/uptime', 'r') as f:
uptime, idletime = f.readline().split()
up_seconds = int(float(uptime))
idle_seconds = int(float(idletime))
# in some machine like Linode VPS, idle time may bigger than up time
if idle_seconds > up_seconds:
cpu_count = multiprocessing.cpu_count()
idle_seconds = idle_seconds/cpu_count
# in some VPS, this value may still bigger than up time
# may be the domain 0 machine has more cores
# we calclate approximately for it
if idle_seconds > up_seconds:
for n in range(2,10):
if idle_seconds/n < up_seconds:
idle_seconds = idle_seconds/n
break
fmt = '{days} ? {hours} ?? {minutes} ? {seconds} ?'
uptime_string = strfdelta(datetime.timedelta(seconds = up_seconds), fmt)
idletime_string = strfdelta(datetime.timedelta(seconds = idle_seconds), fmt)
return {
'up': uptime_string,
'idle': idletime_string,
'idle_rate': div_percent(idle_seconds, up_seconds),
}
def get_cpus(self):
"""
??CPU?????????
"""
try:
pipe = os.popen("cat /proc/cpuinfo |" + "grep 'model name'")
data = pipe.read().strip().split(':')[-1]
pipe.close()
if not data:
pipe = os.popen("cat /proc/cpuinfo |" + "grep 'Processor'")
data = pipe.read().strip().split(':')[-1]
pipe.close()
cpus = multiprocessing.cpu_count()
data = "{CPUS} x {CPU_TYPE}".format(CPUS=cpus, CPU_TYPE=data)
except Exception as err:
print err
data = str(err)
return data
def configure(self, n=1, pool_size=None, episode_limit=None):
self.n = n
self.envs = [self.spec.make() for _ in range(self.n)]
if pool_size is None:
pool_size = min(len(self.envs), multiprocessing.cpu_count() - 1)
pool_size = max(1, pool_size)
self.worker_n = []
m = int((self.n + pool_size - 1) / pool_size)
for i in range(0, self.n, m):
envs = self.envs[i:i+m]
self.worker_n.append(Worker(envs, i))
if episode_limit is not None:
self._episode_id.episode_limit = episode_limit
def save_emblems_field(
self, emblem_with_field_list, field_name, index=True):
total_len = len(emblem_with_field_list)
self.logger.info('Saving field [%s], total=%d', field_name, total_len)
workers = (multiprocessing.cpu_count() or 1)
emblem_freq_chunks = MapReduceDriver.chunks(
emblem_with_field_list, int(total_len / workers))
if index:
self.data_source.create_index(
self.COLLECTION_EMBLEM, 'name', unique=True)
self.data_source.create_index(
self.COLLECTION_EMBLEM, field_name)
field = emblem_with_field_list[0][1]
if isinstance(field, dict):
for key in field.keys():
self.data_source.create_index(
self.COLLECTION_EMBLEM, field_name + '.' + key)
with multiprocessing.Pool(processes=workers) as pool:
pool.starmap(
self._save_emblems_field,
zip(emblem_freq_chunks, repeat(field_name)))
def command_line(self):
cmd = csb.apps.ArgHandler(self.program, __doc__)
cpu = multiprocessing.cpu_count()
cmd.add_scalar_option('database', 'd', str, 'PDBS25 database directory (containing PDBS25cs.scs)', required=True)
cmd.add_scalar_option('shifts', 's', str, 'assigned chemical shifts table (NMR STAR file fragment)', required=True)
cmd.add_scalar_option('window', 'w', int, 'sliding window size', default=8)
cmd.add_scalar_option('top', 't', int, 'maximum number per starting position', default=25)
cmd.add_scalar_option('cpu', 'c', int, 'maximum degree of parallelism', default=cpu)
cmd.add_scalar_option('verbosity', 'v', int, 'verbosity level', default=1)
cmd.add_scalar_option('output', 'o', str, 'output directory', default='.')
cmd.add_boolean_option('filtered-map', 'f', 'make an additional filtered fragment map of centroids', default=False)
cmd.add_positional_argument('QUERY', str, 'query sequence (FASTA file)')
return cmd
def command_line(self):
cpu = multiprocessing.cpu_count()
cmd = csb.apps.ArgHandler(self.program, __doc__)
cmd.add_scalar_option('pdb', 'p', str, 'the PDB database (a directory containing all PDB files)', required=True)
cmd.add_scalar_option('native', 'n', str, 'native structure of the target (PDB file)', required=True)
cmd.add_scalar_option('chain', 'c', str, 'chain identifier (if not specified, the first chain)', default=None)
cmd.add_scalar_option('top', 't', int, 'read top N fragments per position', default=25)
cmd.add_scalar_option('cpu', 'C', int, 'maximum degree of parallelism', default=cpu)
cmd.add_scalar_option('rmsd', 'r', float, 'RMSD cutoff for precision and coverage', default=1.5)
cmd.add_scalar_option('output', 'o', str, 'output directory', default='.')
cmd.add_boolean_option('save-structures', 's', 'create a PDB file for each fragment, superimposed over the native', default=False)
cmd.add_positional_argument('library', str, 'Fragment library file in Rosetta NNmake format')
return cmd
def runmany(self, contexts, workers=mp.cpu_count(), cpu=1):
if workers > len(contexts):
workers = len(contexts)
results = []
taskargs = [(self.program, self.db, cpu, c) for c in contexts]
pool = mp.Pool(workers)
try:
for c in pool.map(_task, taskargs):
results.append(c)
except KeyboardInterrupt:
pass
finally:
pool.terminate()
return results
def __init__(self, callbacks, args, **kwargs):
"""Constructor.
:param callbacks: Callbacks for registered action handlers.
:type callbacks: dict
:param args: CLI arguments.
:type args: dict
:param error_callback: Callback to use when errors occur.
:type error_callback: function
:param source_file: Full path to component source file.
:type source_file: str
"""
self.__args = args
self.__socket = None
self.__schema_registry = get_schema_registry()
self._pool = ThreadPool(cpu_count() * 5)
self.callbacks = callbacks
self.error_callback = kwargs.get('error_callback')
self.source_file = kwargs.get('source_file')
self.context = None
self.poller = None
def test(self):
'''
Test Execution with necessary args
'''
dir = self.params.get('dir', default='.')
nprocs = self.params.get('nprocs', default=None)
seconds = self.params.get('seconds', default=60)
args = self.params.get('args', default='')
if not nprocs:
nprocs = multiprocessing.cpu_count()
loadfile = os.path.join(self.sourcedir, 'client.txt')
cmd = '%s/dbench %s %s -D %s -c %s -t %d' % (self.sourcedir, nprocs,
args, dir, loadfile,
seconds)
process.run(cmd)
self.results = process.system_output(cmd)
pattern = re.compile(r"Throughput (.*?) MB/sec (.*?) procs")
(throughput, procs) = pattern.findall(self.results)[0]
self.whiteboard = json.dumps({'throughput': throughput,
'procs': procs})
def __init__(self, job_queue, initializer=None, auth_generator=None,
num_processes=None, session=requests.Session):
if num_processes is None:
num_processes = multiprocessing.cpu_count() or 1
if num_processes < 1:
raise ValueError("Number of processes should at least be 1.")
self._job_queue = job_queue
self._response_queue = queue.Queue()
self._exc_queue = queue.Queue()
self._processes = num_processes
self._initializer = initializer or _identity
self._auth = auth_generator or _identity
self._session = session
self._pool = [
thread.SessionThread(self._new_session(), self._job_queue,
self._response_queue, self._exc_queue)
for _ in range(self._processes)
]
def cpuinfo(self):
models = []
bitss = []
cpuids = []
with open('/proc/cpuinfo', 'r') as f:
for line in f:
if 'model name' in line or 'physical id' in line or 'flags' in line:
item, value = line.strip().split(':')
item = item.strip()
value = value.strip()
if item == 'model name':
models.append(re.sub('\s+', ' ', value))
elif item == 'physical id':
cpuids.append(value)
elif item == 'flags':
if ' lm ' in value:
bitss.append('64bit')
else:
bitss.append('32bit')
cores = [{'model': x, 'bits': y} for x, y in zip(models, bitss)]
cpu_count = len(set(cpuids))
if cpu_count == 0: cpu_count = 1
return {
'cores': cores,
'cpu_count': cpu_count,
'core_count': len(cores),
}
def main():
processes = []
for i in range(int(cpu_count())):
p = Process(target=worker)
processes.append(p)
p.start()
for p in processes:
p.join()
def get_nonnuma_affinity_ctx( affinity_ctx ):
# test should run but affinity will be ignored
import multiprocessing
maxcpus=multiprocessing.cpu_count()
maxnodes=1
all_cpus='0-'+str(maxcpus-1)
all_cpus_sans0='0-'+str(maxcpus-1)
if maxcpus == 2:
all_cpus_sans0='0-1'
elif maxcpus == 1 :
all_cpus='0'
all_cpus_sans0=''
numa_layout=[ all_cpus ]
affinity_match={ "all" : all_cpus,
"sock0": all_cpus,
"sock1": all_cpus,
"sock0sans0": all_cpus_sans0,
"sock1sans0": all_cpus_sans0,
"5" : all_cpus,
"8-10" : all_cpus }
affinity_ctx['maxcpus']=maxcpus
affinity_ctx['maxnodes']=maxnodes
affinity_ctx['all_cpus']=all_cpus
affinity_ctx['all_cpus_sans0']=all_cpus_sans0
affinity_ctx['numa_layout']=numa_layout
affinity_ctx['affinity_match']=affinity_match
affinity_ctx['affinity_dev_match']=copy.copy(affinity_match)
def download_all_chapters_p(self):
'''
????????
'''
mp=Pool(min(8,max(cpu_count(),4)))
for key in self.chapters.keys():
mp.apply_async(self.download_chapter,(key,False))
mp.close()
mp.join()
def __init__(self, n_jobs: int=-1, parallel_backend: str='joblib') -> None:
"""
Construct OptimalK to use n_jobs (multiprocessing using joblib, multiprocessing, or single core.
:param n_jobs - int: Number of CPU cores to use. Use all cores if n_jobs == -1
"""
self.parallel_backend = parallel_backend if parallel_backend in ['joblib', 'multiprocessing'] else None
self.n_jobs = n_jobs if 1 <= n_jobs <= cpu_count() else cpu_count() # type: int
self.n_jobs = 1 if parallel_backend is None else self.n_jobs