python类cpu_count()的实例源码

si.py 文件源码 项目:Starfish 作者: BillWang139967 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def uptime(self):
        with open('/proc/uptime', 'r') as f:
            uptime, idletime = f.readline().split()
            up_seconds = int(float(uptime))
            idle_seconds = int(float(idletime))
            # in some machine like Linode VPS, idle time may bigger than up time
            if idle_seconds > up_seconds:
                cpu_count = multiprocessing.cpu_count()
                idle_seconds = idle_seconds/cpu_count
                # in some VPS, this value may still bigger than up time
                # may be the domain 0 machine has more cores
                # we calclate approximately for it
                if idle_seconds > up_seconds:
                    for n in range(2,10):
                        if idle_seconds/n < up_seconds:
                            idle_seconds = idle_seconds/n
                            break
            fmt = '{days} ? {hours} ?? {minutes} ? {seconds} ?'
            uptime_string = strfdelta(datetime.timedelta(seconds = up_seconds), fmt)
            idletime_string = strfdelta(datetime.timedelta(seconds = idle_seconds), fmt)
        return {
            'up': uptime_string,
            'idle': idletime_string,
            'idle_rate': div_percent(idle_seconds, up_seconds),
        }
comic.py 文件源码 项目:ComicSpider 作者: QuantumLiu 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def download_chapter_m(self):
        '''
        ???????????
        Download all pages of the chapter using multiprocessing
        '''
        results=[]
        if not self.pages:
            print('No page')
            return None
        mp=Pool(min(8,max(cpu_count(),4)))
        for page in self.pages:
            results.append(mp.apply_async(self.download_page,(page,)))
        mp.close()
        mp.join()
        num=sum([result.get() for result in results])
        print('Downloaded {} pages'.format(num))
parallelizer.py 文件源码 项目:NeoAnalysis 作者: neoanalysis 项目源码 文件源码 阅读 56 收藏 0 点赞 0 评论 0
def suggestedWorkerCount():
        if 'linux' in sys.platform:
            ## I think we can do a little better here..
            ## cpu_count does not consider that there is little extra benefit to using hyperthreaded cores.
            try:
                cores = {}
                pid = None

                for line in open('/proc/cpuinfo'):
                    m = re.match(r'physical id\s+:\s+(\d+)', line)
                    if m is not None:
                        pid = m.groups()[0]
                    m = re.match(r'cpu cores\s+:\s+(\d+)', line)
                    if m is not None:
                        cores[pid] = int(m.groups()[0])
                return sum(cores.values())
            except:
                return multiprocessing.cpu_count()

        else:
            return multiprocessing.cpu_count()
parallelizer.py 文件源码 项目:NeoAnalysis 作者: neoanalysis 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def suggestedWorkerCount():
        if 'linux' in sys.platform:
            ## I think we can do a little better here..
            ## cpu_count does not consider that there is little extra benefit to using hyperthreaded cores.
            try:
                cores = {}
                pid = None

                for line in open('/proc/cpuinfo'):
                    m = re.match(r'physical id\s+:\s+(\d+)', line)
                    if m is not None:
                        pid = m.groups()[0]
                    m = re.match(r'cpu cores\s+:\s+(\d+)', line)
                    if m is not None:
                        cores[pid] = int(m.groups()[0])
                return sum(cores.values())
            except:
                return multiprocessing.cpu_count()

        else:
            return multiprocessing.cpu_count()
tox_proclimit.py 文件源码 项目:detox 作者: tox-dev 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def tox_addoption(parser):
    def positive_integer(value):
        ivalue = int(value)
        if ivalue <= 0:
            raise argparse.ArgumentTypeError(
                "%s is an invalid positive int value" % value)
        return ivalue

    try:
        num_proc = multiprocessing.cpu_count()
    except Exception:
        num_proc = 2
    parser.add_argument(
        "-n", "--num",
        type=positive_integer,
        action="store",
        default=num_proc,
        dest="numproc",
        help="set the number of concurrent processes "
             "(default %s)." % num_proc)
raw.py 文件源码 项目:girder_worker 作者: girder 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_celery_task_revoke_in_queue(self, params):
        # Fill up queue
        blockers = []
        for _ in range(0, multiprocessing.cpu_count()):
            blockers .append(cancelable.delay(sleep_interval=0.1))

        result = cancelable.delay()
        result.revoke()

        assert wait_for_status(self.getCurrentUser(), result.job, JobStatus.CANCELED)

        # Now clean up the blockers
        for blocker in blockers:
            blocker.revoke()

        return result.job
traditional.py 文件源码 项目:girder_worker 作者: girder 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_traditional_task_cancel_in_queue(self, params):
        # Fill up queue
        blockers = []
        for _ in range(0, multiprocessing.cpu_count()):
            blockers .append(cancelable.delay(sleep_interval=0.1))

        jobModel = self.model('job', 'jobs')
        job = jobModel.createJob(
            title='test_traditional_task_cancel',
            type='worker', handler='worker_handler',
            user=self.getCurrentUser(), public=False, args=(self.girder_worker_run_cancelable,),
            kwargs={'inputs': {},
                    'outputs': {}})

        job['kwargs']['jobInfo'] = utils.jobInfoSpec(job)

        jobModel.save(job)
        jobModel.scheduleJob(job)
        jobModel.cancelJob(job)

        # Now clean up the blockers
        for blocker in blockers:
            blocker.revoke()

        return job
scrape_wiki.py 文件源码 项目:GANGogh 作者: rkjones4 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def for_genre(genre,num):
    pool = ThreadPool(multiprocessing.cpu_count()-1)
    nums = list(range(1,num))
    results = pool.starmap(soupit,zip(nums,itertools.repeat(genre)))
    pool.close()
    pool.join()

    #build up the list of urls with the results of all the sub-processes that succeeded in a single list
    new_results = []
    for j in results:
        if j:
            for i in j:
                new_results.append(i)

    pool = ThreadPool(multiprocessing.cpu_count()-1)
    pool.starmap(dwnld,zip(enumerate(new_results),itertools.repeat(genre)))
    pool.close
    pool.close()
classify.py 文件源码 项目:fingerprint-securedrop 作者: freedomofpress 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def __init__(self, model_timestamp, world, model_type, 
                 hyperparameters, feature_scaling=True,
                 n_cores=multiprocessing.cpu_count(), k=10):
        """
        Args:
            model [string]: machine learning algorithm to be used
            parameters [dict]: hyperparameter set to be used for the
                               machine learning algorithm
            k [int]: number of k-folds
            world [dict]: world type (open- or closed- world)
                          and parameters if necessary
        """

        self.model_timestamp = model_timestamp
        self.hyperparameters = hyperparameters
        self.model_type = model_type
        self.world_type = world["type"]
        self.frac_obs = world["observed_fraction"]
        self.n_cores = n_cores
        self.k = k
        self.feature_scaling = feature_scaling
        self.db = database.ModelStorage()
        self.train_class_balance = 'DEFAULT'
        self.base_rate = 'DEFAULT'
__init__.py 文件源码 项目:ronin 作者: tliron 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, command=None, jobs=None):
        """
        :param command: ``cargo`` command; defaults to the context's ``rust.cargo_command``
        :type command: basestring or ~types.FunctionType
        :param jobs: number of jobs; defaults to CPU count + 1
        :type jobs: int
        """

        super(CargoBuild, self).__init__()
        self.command = lambda ctx: which(ctx.fallback(command, 'rust.cargo_command',
                                                      DEFAULT_CARGO_COMMAND))
        self.add_argument('build')
        self.add_argument_unfiltered('--manifest-path', '$in')
        if jobs is None:
            jobs = cpu_count() + 1
        self.jobs(jobs)
        self.hooks.append(_cargo_output_path_hook)
        self.hooks.append(_cargo_debug_hook)
train.py 文件源码 项目:TextRankPlus 作者: zuoxiaolei 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def run():
    '''
    ????
    '''
    reload(sys)
    sys.setdefaultencoding('utf8')
    program = os.path.basename(sys.argv[0])
    logger = logging.getLogger(program)
    logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
    logging.root.setLevel(level=logging.INFO)
    logger.info("running %s" % ' '.join(sys.argv))

    outp1 = r'wiki_model'
    outp2 = r'vector.txt'
    model = Word2Vec(sentences, size=400, window=5, min_count=5, workers=multiprocessing.cpu_count())
    model.save(outp1)
    model.wv.save_word2vec_format(outp2, binary=False)

    testData = ['??','??','??','??']
    for i in testData:
        temp = model.most_similar(i)
        for j in temp:
            print '%f %s'%(j[1],j[0])
        print ''
Task.py 文件源码 项目:mongodb_consistent_backup 作者: Percona-Lab 项目源码 文件源码 阅读 51 收藏 0 点赞 0 评论 0
def __init__(self, task_name, manager, config, timer, base_dir, backup_dir, **kwargs):
        self.task_name  = task_name
        self.manager    = manager
        self.config     = config
        self.timer      = timer
        self.base_dir   = base_dir
        self.backup_dir = backup_dir
        self.args       = kwargs
        self.verbose    = self.config.verbose

        self.runnning  = False
        self.stopped   = False
        self.completed = False
        self.exit_code = 255

        self.thread_count          = None
        self.cpu_count             = cpu_count()
        self.compression_method    = 'none'
        self.compression_supported = ['none']
        self.timer_name            = self.__class__.__name__

        signal(SIGINT, SIG_IGN)
        signal(SIGTERM, self.close)
problems.py 文件源码 项目:onsager_deep_learning 作者: mborgerding 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def random_access_problem(which=1):
    import raputil as ru
    if which == 1:
        opts = ru.Problem.scenario1()
    else:
        opts = ru.Problem.scenario2()

    p = ru.Problem(**opts)
    x1 = p.genX(1)
    y1 = p.fwd(x1)
    A = p.S
    M,N = A.shape
    nbatches = int(math.ceil(1000 /x1.shape[1]))
    prob = NumpyGenerator(p=p,nbatches=nbatches,A=A,opts=opts,iid=(which==1))
    if which==2:
        prob.maskX_ = tf.expand_dims( tf.constant( (np.arange(N) % (N//2) < opts['Nu']).astype(np.float32) ) , 1)

    _,prob.noise_var = p.add_noise(y1)

    unused = p.genYX(nbatches) # for legacy reasons -- want to compare against a previous run
    (prob.yval, prob.xval) = p.genYX(nbatches)
    (prob.yinit, prob.xinit) = p.genYX(nbatches)
    import multiprocessing as mp
    prob.nsubprocs = mp.cpu_count()
    return prob
worker.py 文件源码 项目:highfive 作者: abau171 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def run_worker_pool(job_handler, host="localhost", port=48484,
                      *, max_workers=None):
    """
    Runs a pool of workers which connect to a remote HighFive master and begin
    executing calls.
    """

    if max_workers is None:
        max_workers = multiprocessing.cpu_count()

    processes = []
    for _ in range(max_workers):
        p = multiprocessing.Process(target=worker_main,
                args=(job_handler, host, port))
        p.start()
        processes.append(p)

    logger.debug("workers started")

    for p in processes:
        p.join()

    logger.debug("all workers completed")
_extractor.py 文件源码 项目:DataProperty 作者: thombashi 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def to_dp_matrix(self, value_matrix):
        self.__update_dp_converter()
        logger.debug("max_workers = {}".format(self.max_workers))

        value_matrix = self.__strip_data_matrix(value_matrix)

        if self.__is_dp_matrix(value_matrix):
            logger.debug("already a dataproperty matrix")
            return value_matrix

        if not self.max_workers:
            self.max_workers = multiprocessing.cpu_count()

        if self.max_workers <= 1:
            return self.__to_dp_matrix_st(value_matrix)

        return self.__to_dp_matrix_mt(value_matrix)
clean_text.py 文件源码 项目:glassdoor-analysis 作者: THEdavehogue 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def multi_scrub_text(reviews):
    '''
    Function to lemmatize text - utilizes multiprocessing for parallelization

    INPUT:
        reviews: array-like, pandas DataFrame column containing review texts

    OUTPUT:
        lemmatized: pandas DataFrame column with cleaned texts
    '''
    lemmatized = []
    cpus = cpu_count() - 1
    pool = Pool(processes=cpus)
    lemmatized = pool.map(lemmatize_text, reviews)
    pool.close()
    pool.join()
    return lemmatized
glassdoor_search.py 文件源码 项目:glassdoor-analysis 作者: THEdavehogue 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def multi_core_scrape(num_pages, db_coll):
    '''
    Map the API scrape across number of processors - 1 for performance boost.

    INPUT:
        num_pages: int, number of pages to scrape
        db_coll: pymongo collection object, collection to add documents to

    OUTPUT:
        None, records inserted into MongoDB
    '''
    cpus = cpu_count() - 1
    pool = Pool(processes=cpus)
    pages = range(1, num_pages + 1)
    employers = pool.map(scrape_api_page, pages)
    pool.close()
    pool.join()
    print 'Inserting Employer Records into MongoDB . . .'
    pbar = ProgressBar()
    for page in pbar(employers):
        db_coll.insert_many(page)
cartconverter.py 文件源码 项目:GALEX 作者: rahul-aedula95 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def degreetocart(data_f1):

    global df2
    df2 = data_f1.copy()
    print "phase 1"

    df2['X'] = np.nan
    df2['Y'] = np.nan
    df2['Z'] = np.nan
    df2 = df2.astype(float)

    print "phase 2"
    num_cores = multiprocessing.cpu_count()


    results_x = Parallel(n_jobs=num_cores)(delayed(xloop)(i) for i in xrange(0,len(df2)))
    print "phase 3"
    #print results_x

    #print results_x

    #print " this is "
    #print results_x[0]

    results_y = Parallel(n_jobs=num_cores)(delayed(yloop)(i) for i in xrange(0,len(df2)))
    print "phase 4"
    results_z = Parallel(n_jobs=num_cores)(delayed(zloop)(i) for i in xrange(0,len(df2)))
    print "phase 5"
    #print results_y



    #Parallel(n_jobs=num_cores)(delayed(adjloop)(i) for i in xrange(0,len(df2)))
    for i in xrange(0,len(df2)):
        print i
        df2['X'][i] = results_x[i]
        df2['Y'][i] = results_y[i]
        df2['Z'][i] = results_z[i]
processing.py 文件源码 项目:keras-text 作者: raghakot 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def _parse_spacy_kwargs(**kwargs):
    """Supported args include:

    Args:
        n_threads/num_threads: Number of threads to use. Uses num_cpus - 1 by default.
        batch_size: The number of texts to accumulate into a common working set before processing.
            (Default value: 1000)
    """
    n_threads = kwargs.get('n_threads') or kwargs.get('num_threads')
    batch_size = kwargs.get('batch_size')

    if n_threads is None or n_threads is -1:
        n_threads = cpu_count() - 1
    if batch_size is None or batch_size is -1:
        batch_size = 1000
    return n_threads, batch_size
dbench.py 文件源码 项目:avocado-misc-tests 作者: avocado-framework 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test(self):
        '''
        Test Execution with necessary args
        '''
        dir = self.params.get('dir', default='.')
        nprocs = self.params.get('nprocs', default=None)
        seconds = self.params.get('seconds', default=60)
        args = self.params.get('args', default='')
        if not nprocs:
            nprocs = multiprocessing.cpu_count()
        loadfile = os.path.join(self.srcdir, 'client.txt')
        cmd = '%s/dbench %s %s -D %s -c %s -t %d' % (self.srcdir, nprocs, args,
                                                     dir, loadfile, seconds)
        process.run(cmd)

        self.results = process.system_output(cmd)
        pattern = re.compile(r"Throughput (.*?) MB/sec (.*?) procs")
        (throughput, procs) = pattern.findall(self.results)[0]
        perf_json = {'throughput': throughput, 'procs': procs}
        output_path = os.path.join(self.outputdir, "perf.json")
        json.dump(perf_json, open(output_path, "w"))
snakemake.py 文件源码 项目:sequana 作者: sequana 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def __init__(self, parent=None):
        super().__init__(parent=parent)


        self.ui = Ui_Snakemake()
        self.ui.setupUi(self)

        # This is for the --cluster-config case
        # Note the double underscore that is used later to be replaced by a dash
        self.ui.snakemake_options_cluster_cluster__config_value = FileBrowser()
        self.ui.horizontalLayout_4.addWidget(
            self.ui.snakemake_options_cluster_cluster__config_value)

        self._application = "sequana_gui"
        self._section = "snakemake_dialog"
        self.read_settings()

        # Set maximum of local cores to be used
        cpu = multiprocessing.cpu_count()
        self.ui.snakemake_options_local_cores_value.setMaximum(cpu)
wordembed.py 文件源码 项目:pytorch-skipthoughts 作者: kaniblu 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def load_embeddings_mp(path, word_dim, processes=None):

    if processes is None:
        processes = multiprocessing.cpu_count()

    pool = mp.Pool(processes, initializer=_mp_initialize,
                   initargs=(word_dim,))

    with open(path, "r") as f:
        iterator = chunks(f, n=processes,
                          k=processes * 10000)
        ret = {}
        for batches in iterator:
            results = pool.map_async(_mp_process, batches)
            results = results.get()
            results = aggregate_dicts(*results)

            ret.update(results)

        return ret
utils.py 文件源码 项目:POT 作者: rflamary 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def parmap(f, X, nprocs=multiprocessing.cpu_count()):
    """ paralell map for multiprocessing """
    q_in = multiprocessing.Queue(1)
    q_out = multiprocessing.Queue()

    proc = [multiprocessing.Process(target=fun, args=(f, q_in, q_out))
            for _ in range(nprocs)]
    for p in proc:
        p.daemon = True
        p.start()

    sent = [q_in.put((i, x)) for i, x in enumerate(X)]
    [q_in.put((None, None)) for _ in range(nprocs)]
    res = [q_out.get() for _ in range(len(sent))]

    [p.join() for p in proc]

    return [x for i, x in sorted(res)]
gunicorn.py 文件源码 项目:Dallinger 作者: Dallinger 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def load_user_config(self):
        config = get_config()
        workers = config.get("threads")
        if workers == "auto":
            workers = str(multiprocessing.cpu_count() * 2 + 1)

        host = config.get("host")
        bind_address = "{}:{}".format(host, self.port)
        self.options = {
            'bind': bind_address,
            'workers': workers,
            'worker_class': WORKER_CLASS,
            'loglevels': self.loglevels,
            'loglevel': self.loglevels[config.get("loglevel")],
            'errorlog': '-',
            'accesslog': '-',
            'proc_name': 'dallinger_experiment_server',
            'limit_request_line': '0',
            'when_ready': when_ready,
        }
genetic.py 文件源码 项目:nimo 作者: wolfram2012 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def __init__(self,fitness,args=[],kwargs={},population_size=100,n_processes="AUTO"):
        self.fitness = fitness
        self.args = args
        self.kwargs = kwargs
        self.population_size = population_size
        self.n_processes = n_processes
        if self.n_processes == "AUTO":
            self.n_processes = mp.cpu_count()

        self.run_data = None

        self.running_workers = 0

        self.best_score = np.inf
        self.population = []
        self.bests = []
        self.worsts = []
        self.history = []
        self.iter = 0
sswarm.py 文件源码 项目:swarm 作者: a7vinx 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_do_task(self):
        proc=[]
        if self._args.process_num==0:
            for cur in range(multiprocessing.cpu_count()):
                p=multiprocessing.Process(target=self._get_do_task_proc)
                p.start()
                proc.append(p)
        else:
            for cur in range(self._args.process_num):
                p=multiprocessing.Process(target=self._get_do_task_proc)
                p.start()
                proc.append(p)
        # start a new thread to listen command from master host
        # use daemon argtment so we need not to wait for this thread to exit
        t=threading.Thread(target=self._response_master)
        t.daemon=True
        t.start()
        for cur in proc:
            cur.join()
        LOG.debug('task completed')
wikicorpus.py 文件源码 项目:paragraph2vec 作者: thunlp 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def __init__(self, fname, processes=None, lemmatize=utils.HAS_PATTERN, dictionary=None, filter_namespaces=('0',)):
        """
        Initialize the corpus. Unless a dictionary is provided, this scans the
        corpus once, to determine its vocabulary.

        If `pattern` package is installed, use fancier shallow parsing to get
        token lemmas. Otherwise, use simple regexp tokenization. You can override
        this automatic logic by forcing the `lemmatize` parameter explicitly.

        """
        self.fname = fname
        self.filter_namespaces = filter_namespaces
        self.metadata = False
        if processes is None:
            processes = max(1, multiprocessing.cpu_count() - 1)
        self.processes = processes
        self.lemmatize = lemmatize
        if dictionary is None:
            self.dictionary = Dictionary(self.get_texts())
        else:
            self.dictionary = dictionary
run_benchmarks.py 文件源码 项目:veros 作者: dionhaefner 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def parse_cli():
    parser = argparse.ArgumentParser(description="Run Veros benchmarks")
    parser.add_argument("-f", "--fortran-library", type=str, help="Path to pyOM2 fortran library")
    parser.add_argument("-s", "--sizes", nargs="*", type=float, required=True,
                        help="Problem sizes to test (total number of elements)")
    parser.add_argument("-c", "--components", nargs="*", choices=COMPONENTS, default=["numpy"], metavar="COMPONENT",
                        help="Numerical backend components to benchmark (possible values: {})".format(", ".join(COMPONENTS)))
    parser.add_argument("-n", "--nproc", type=int, default=multiprocessing.cpu_count(),
                        help="Number of processes / threads for parallel execution")
    parser.add_argument("-o", "--outfile", default="benchmark_{}.json".format(time.time()), help="JSON file to write timings to")
    parser.add_argument("-t", "--timesteps", default=1000, type=int, help="Number of time steps that each benchmark is run for")
    parser.add_argument("--only", nargs="*", default=AVAILABLE_BENCHMARKS,
                        help="Run only these benchmarks (possible values: {})".format(", ".join(AVAILABLE_BENCHMARKS)),
                        choices=AVAILABLE_BENCHMARKS, required=False, metavar="BENCHMARK")
    parser.add_argument("--mpiexec", default="mpiexec", help="Executable used for calling MPI (e.g. mpirun, mpiexec)")
    parser.add_argument("--slurm", action="store_true", help="Run benchmarks using SLURM scheduling command (srun)")
    parser.add_argument("--debug", action="store_true", help="Additionally print each command that is executed")
    parser.add_argument("--float-type", default="float64", help="Data type for floating point arrays in Veros components")
    parser.add_argument("--burnin", default=3, type=int, help="Number of iterations to exclude in timings")
    return parser.parse_args()
engine.py 文件源码 项目:demos 作者: dfirence 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def run_concurrently( queue ):
    start = time.time()
    cpus  = mp.cpu_count()
    qsize = queue.qsize()
    procs = []
    with ProcessPoolExecutor( cpus ) as executor:
        for n in xrange( qsize ):
            proc = mp.Process( target=run_plugin, args=( queue.get(),) )
            procs.append( proc )
            proc.start()
            time.sleep( 0.05 )
        for proc in procs:
            proc.join()
            time.sleep( 0.05 )
    #end = '[+] Ends  {:30} {}: {:.2f}s'.format( 'Concurrency of', qsize, 'tasks',time.time() - start)
    t = '{:.2f}s'.format( time.time() - start )
    end = '[+] Ends  [ {} ] Concurrent Tasks'.format( qsize )

    print ('\033[1;32;40m' + '{:35}--> {}{}'.format(end, t, '\n'))
    print '{}{}'.format( '-' * 48, '\n' )
    #print '{}{}{}{}'.format( end, '\n', '-' * 48, '\n' )
    return
train_vector_model.py 文件源码 项目:MyCluster 作者: yinminggang 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def trainWord2Vector(sentence_count, vector_dimension, train_count):

    lines, model_out, vector_out = "sources/splited_words.txt", "result/word2vec.model", "result/pre_word2vec.vector"
    logging.info("??????")
    sentences = LineSentence(lines)
    # ??min_count=3??????3?? ????????????word2vec.vector?
    # workers????????????CPU??  ???3
    # sg?????????
    model = Word2Vec(sentences, sg=1, size=vector_dimension, window=8,
                     min_count=0, workers=multiprocessing.cpu_count())
    # ?????  ??????
    for i in range(train_count):
        model.train(sentences=sentences, total_examples=sentence_count, epochs=model.iter)

    # trim unneeded model memory = use(much) less RAM
    # model.init_sims(replace=True)
    model.save(model_out)
    model.wv.save_word2vec_format(vector_out)


问题


面经


文章

微信
公众号

扫码关注公众号