python类tqdm_notebook()的实例源码-面圈网

gempro.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def pdb_downloader_and_metadata(self, outdir=None, pdb_file_type=None, force_rerun=False):
        """Download ALL mapped experimental structures to each protein's structures directory.

        Args:
            outdir (str): Path to output directory, if GEM-PRO directories were not set or other output directory is
                desired
            pdb_file_type (str): Type of PDB file to download, if not already set or other format is desired
            force_rerun (bool): If files should be re-downloaded if they already exist

        """

        if not pdb_file_type:
            pdb_file_type = self.pdb_file_type

        counter = 0
        for g in tqdm(self.genes):
            pdbs = g.protein.pdb_downloader_and_metadata(outdir=outdir, pdb_file_type=pdb_file_type, force_rerun=force_rerun)

            if pdbs:
                counter += len(pdbs)

        log.info('Updated PDB metadata dataframe. See the "df_pdb_metadata" attribute for a summary dataframe.')
        log.info('Saved {} structures total'.format(counter))

progress_bar.py 文件源码项目：pyrsss 作者: butala 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def tqdm_callback(N, notebook=True):
    """
    Return a :module:`tqdm` progress bar expecting *N* iterations,
    either suitable with jupyter if *notebook* is true and for the
    terminal otherwise. The progress bar includes an additional method
    :function:`callback` (function of one ignored parameter) meant to
    be past as a callback function called to update the progress bar.
    """
    if notebook:
        progress_bar = tqdm.tqdm_notebook(total=N)
    else:
        progress_bar = tqdm.tqdm(total=N)
    def callback(self, i):
        self.update()
    progress_bar.callback = partial(callback, progress_bar)
    return progress_bar

kaggle.py 文件源码项目：catalearn 作者: Catalearn 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def __download_competition_file(self, competition, file_name, browser):

        url = 'https://www.kaggle.com/c/%s/download/%s' % (competition, file_name)
        res = browser.get(url, stream=True)

        total_size = int(res.headers.get('content-length', 0)); 

        if res.status_code != 200:
            print('error downloading %s' % file_name)
            return False

        file_name = os.path.basename(url)

        pbar = tqdm(total=total_size, unit='B', unit_scale=True, desc=file_name)
        chunk_size = 32 * 1024

        with open(file_name, 'wb') as file_handle:
            for data in res.iter_content(chunk_size):
                file_handle.write(data) 
                pbar.update(chunk_size)

        return True

experiment.py 文件源码项目：Auspex 作者: BBN-Q 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def reset(self, stream=None):
        """ Reset the progress bar(s) """
        logger.debug("Update stream descriptor for progress bars.")
        if stream is not None:
            self.stream = stream
        if self.stream is None:
            logger.warning("No stream is associated with the progress bars!")
            self.axes = []
        else:
            self.axes = self.stream.descriptor.axes
        self.num = min(self.num, len(self.axes))
        self.totals = [self.stream.descriptor.num_points_through_axis(axis) for axis in range(self.num)]
        self.chunk_sizes = [max(1,self.stream.descriptor.num_points_through_axis(axis+1)) for axis in range(self.num)]
        logger.debug("Reset the progress bars to initial states.")
        self.bars   = []
        for i in range(self.num):
            if self.notebook:
                self.bars.append(tqdm_notebook(total=self.totals[i]/self.chunk_sizes[i]))
            else:
                self.bars.append(tqdm(total=self.totals[i]/self.chunk_sizes[i]))

experiment.py 文件源码项目：Auspex 作者: BBN-Q 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def update(self):
        """ Update the status of the progress bar(s) """
        if self.stream is None:
            logger.warning("No stream is associated with the progress bars!")
            num_data = 0
        else:
            num_data = self.stream.points_taken
        logger.debug("Update the progress bars.")
        for i in range(self.num):
            if num_data == 0:
                # Reset the progress bar with a new one
                if self.notebook:
                    self.bars[i].sp(close=True)
                    self.bars[i] = tqdm_notebook(total=self.totals[i]/self.chunk_sizes[i])
                else:
                    self.bars[i].close()
                    self.bars[i] = tqdm(total=self.totals[i]/self.chunk_sizes[i])
            pos = int(10*num_data / self.chunk_sizes[i])/10.0 # One decimal is good enough
            if pos > self.bars[i].n:
                self.bars[i].update(pos - self.bars[i].n)
            num_data = num_data % self.chunk_sizes[i]

KA_ensemble.py 文件源码项目：Kaggle_Buddy 作者: NickYi1990 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def ka_bagging_2class_or_reg_lgbm(X_train, y_train, seed, bag_round, params
                                 , X_test, using_notebook=True, num_boost_round=0):
    '''
        early version
    '''
    # create array object to hold predictions
    baggedpred=np.zeros(shape=X_test.shape[0]).astype(np.float32)
    #loop for as many times as we want bags
    if using_notebook:
        for n in tqdm_notebook(range(0, bag_round)):
            #shuffle first, aids in increasing variance and forces different results
            X_train, y_train=shuffle(X_train, y_train, random_state=seed+n)
            params['seed'] = seed + n
            model = lightgbm.train(params, lightgbm.Dataset(X_train, y_train), num_boost_round=num_boost_round)
            pred = model.predict(X_test)
            baggedpred += pred/bag_round

    return baggedpred

mephistopheies.py 文件源码项目：kaggle-quora-question-pairs 作者: stys 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def compute_sgd(data):
    logging.info('Computing SGD')

    n_splits = 10
    folder = StratifiedKFold(n_splits=n_splits, shuffle=True)
    for ix_first, ix_second in tqdm_notebook(folder.split(np.zeros(data['y_train'].shape[0]), data['y_train']),
                                             total=n_splits):
        # {'en__l1_ratio': 0.0001, 'en__alpha': 1e-05}
        model = SGDClassifier(
            loss='log',
            penalty='elasticnet',
            fit_intercept=True,
            n_iter=100,
            shuffle=True,
            n_jobs=-1,
            l1_ratio=0.0001,
            alpha=1e-05,
            class_weight=None)
        model = model.fit(data['X_train'][ix_first, :], data['y_train'][ix_first])
        data['y_train_pred'][ix_second] = logit(model.predict_proba(data['X_train'][ix_second, :])[:, 1])
        data['y_test_pred'].append(logit(model.predict_proba(data['X_test'])[:, 1]))

    data['y_test_pred'] = np.array(data['y_test_pred']).T.mean(axis=1)

    return data

atlas.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def download_patric_genomes(self, ids, force_rerun=False):
        """Download genome files from PATRIC given a list of PATRIC genome IDs and load them as strains.

        Args:
            ids (str, list): PATRIC ID or list of PATRIC IDs
            force_rerun (bool): If genome files should be downloaded again even if they exist

        """
        ids = ssbio.utils.force_list(ids)

        counter = 0
        log.info('Downloading sequences from PATRIC...')
        for patric_id in tqdm(ids):
            f = ssbio.databases.patric.download_coding_sequences(patric_id=patric_id, seqtype='protein',
                                                                 outdir=self.sequences_by_organism_dir,
                                                                 force_rerun=force_rerun)
            if f:
                self.load_strain(patric_id, f)
                counter += 1
                log.debug('{}: downloaded sequence'.format(patric_id))
            else:
                log.warning('{}: unable to download sequence'.format(patric_id))

        log.info('Created {} new strain GEM-PROs, accessible at "strains" attribute'.format(counter))

atlas.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def build_strain_specific_models(self, save_models=False):
        """Using the orthologous genes matrix, create and modify the strain specific models based on if orthologous
            genes exist.

        Also store the sequences directly in the reference GEM-PRO protein sequence attribute for the strains.
        """

        if len(self.df_orthology_matrix) == 0:
            raise RuntimeError('Empty orthology matrix')

        # Create an emptied copy of the reference GEM-PRO
        for strain_gempro in tqdm(self.strains):
            log.debug('{}: building strain specific model'.format(strain_gempro.id))

            # For each genome, load the metabolic model or genes from the reference GEM-PRO
            logging.disable(logging.WARNING)
            if self._empty_reference_gempro.model:
                strain_gempro.load_cobra_model(self._empty_reference_gempro.model)
            elif self._empty_reference_gempro.genes:
                strain_gempro.genes = [x.id for x in self._empty_reference_gempro.genes]
            logging.disable(logging.NOTSET)

            # Get a list of genes which do not have orthology in the strain
            not_in_strain = self.df_orthology_matrix[pd.isnull(self.df_orthology_matrix[strain_gempro.id])][strain_gempro.id].index.tolist()

            # Mark genes non-functional
            self._pare_down_model(strain_gempro=strain_gempro, genes_to_remove=not_in_strain)

            # Load sequences into the base and strain models
            self._load_strain_sequences(strain_gempro=strain_gempro)

            if save_models:
                cobra.io.save_json_model(model=strain_gempro.model,
                                         filename=op.join(self.model_dir, '{}.json'.format(strain_gempro.id)))
                strain_gempro.save_pickle(op.join(self.model_dir, '{}_gp.pckl'.format(strain_gempro.id)))


        log.info('Created {} new strain-specific models and loaded in sequences'.format(len(self.strains)))

atlas.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def align_orthologous_genes_pairwise(self, gapopen=10, gapextend=0.5):
        """For each gene in the base strain, run a pairwise alignment for all orthologous gene sequences to it."""
        for ref_gene in tqdm(self.reference_gempro.genes):
            if len(ref_gene.protein.sequences) > 1:
                alignment_dir = op.join(self.sequences_by_gene_dir, ref_gene.id)
                if not op.exists(alignment_dir):
                    os.mkdir(alignment_dir)
                ref_gene.protein.pairwise_align_sequences_to_representative(gapopen=gapopen, gapextend=gapextend,
                                                                            outdir=alignment_dir, parse=True)

gempro.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def set_representative_sequence(self, force_rerun=False):
        """Automatically consolidate loaded sequences (manual, UniProt, or KEGG) and set a single representative sequence.

        Manually set representative sequences override all existing mappings. UniProt mappings override KEGG mappings
        except when KEGG mappings have PDBs associated with them and UniProt doesn't.

        Args:
            force_rerun (bool): Set to True to recheck stored sequences

        """

        # TODO: rethink use of multiple database sources - may lead to inconsistency with genome sources

        sequence_missing = []
        successfully_mapped_counter = 0
        for g in tqdm(self.genes):
            repseq = g.protein.set_representative_sequence(force_rerun=force_rerun)

            if not repseq:
                sequence_missing.append(g.id)
            elif not repseq.sequence_file:
                sequence_missing.append(g.id)
            else:
                successfully_mapped_counter += 1

        log.info('{}/{}: number of genes with a representative sequence'.format(len(self.genes_with_a_representative_sequence),
                                                                                len(self.genes)))
        log.info('See the "df_representative_sequences" attribute for a summary dataframe.')

gempro.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def get_sequence_properties(self, representatives_only=True):
        """Run Biopython ProteinAnalysis and EMBOSS pepstats to summarize basic statistics of all protein sequences.
        Results are stored in the protein's respective SeqProp objects at ``.annotations``

        Args:
            representative_only (bool): If analysis should only be run on the representative sequences

        """
        for g in tqdm(self.genes):
            g.protein.get_sequence_properties(representative_only=representatives_only)

gempro.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def blast_seqs_to_pdb(self, seq_ident_cutoff=0, evalue=0.0001, all_genes=False, display_link=False,
                          outdir=None, force_rerun=False):
        """BLAST each representative protein sequence to the PDB. Saves raw BLAST results (XML files).

        Args:
            seq_ident_cutoff (float, optional): Cutoff results based on percent coverage (in decimal form)
            evalue (float, optional): Cutoff for the E-value - filters for significant hits. 0.001 is liberal,
                0.0001 is stringent (default).
            all_genes (bool): If all genes should be BLASTed, or only those without any structures currently mapped
            display_link (bool, optional): Set to True if links to the HTML results should be displayed
            outdir (str): Path to output directory of downloaded files, must be set if GEM-PRO directories
                were not created initially
            force_rerun (bool, optional): If existing BLAST results should not be used, set to True. Default is False

        """
        counter = 0

        for g in tqdm(self.genes_with_a_representative_sequence):
            # If all_genes=False, BLAST only genes without a uniprot -> pdb mapping
            if g.protein.num_structures_experimental > 0 and not all_genes and not force_rerun:
                log.debug('{}: skipping BLAST, {} experimental structures already mapped '
                          'and all_genes flag is False'.format(g.id,
                                                               g.protein.num_structures_experimental))
                continue

            # BLAST the sequence to the PDB
            new_pdbs = g.protein.blast_representative_sequence_to_pdb(seq_ident_cutoff=seq_ident_cutoff,
                                                                   evalue=evalue,
                                                                   display_link=display_link,
                                                                   outdir=outdir,
                                                                   force_rerun=force_rerun)

            if new_pdbs:
                counter += 1
                log.debug('{}: {} PDBs BLASTed'.format(g.id, len(new_pdbs)))
            else:
                log.debug('{}: no BLAST results'.format(g.id))

        log.info('Completed sequence --> PDB BLAST. See the "df_pdb_blast" attribute for a summary dataframe.')
        log.info('{}: number of genes with additional structures added from BLAST'.format(counter))

gempro.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def get_dssp_annotations(self, representatives_only=True, force_rerun=False):
        """Run DSSP on structures and store calculations.

        Annotations are stored in the protein structure's chain sequence at:
        ``<chain_prop>.seq_record.letter_annotations['*-dssp']``

        Args:
            representative_only (bool): If analysis should only be run on the representative structure
            force_rerun (bool): If calculations should be rerun even if an output file exists

        """
        for g in tqdm(self.genes):
            g.protein.get_dssp_annotations(representative_only=representatives_only, force_rerun=force_rerun)

gempro.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def get_msms_annotations(self, representatives_only=True, force_rerun=False):
        """Run MSMS on structures and store calculations.

        Annotations are stored in the protein structure's chain sequence at:
        ``<chain_prop>.seq_record.letter_annotations['*-msms']``

        Args:
            representative_only (bool): If analysis should only be run on the representative structure
            force_rerun (bool): If calculations should be rerun even if an output file exists

        """
        for g in tqdm(self.genes):
            g.protein.get_msms_annotations(representative_only=representatives_only, force_rerun=force_rerun)

gempro.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def find_disulfide_bridges(self, representatives_only=True):
        """Run Biopython's disulfide bridge finder and store found bridges.

        Annotations are stored in the protein structure's chain sequence at:
        ``<chain_prop>.seq_record.annotations['SSBOND-biopython']``

        Args:
            representative_only (bool): If analysis should only be run on the representative structure

        """
        for g in tqdm(self.genes):
            g.protein.find_disulfide_bridges(representative_only=representatives_only)

    ### END STRUCTURE RELATED METHODS ###
    ####################################################################################################################

saver.py 文件源码项目：catalearn 作者: Catalearn 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def download_progress(res, stream, save_name):
    total_size = int(res.headers.get('content-length', 0)); 

    chunk_size = 32 * 1024
    pbar = tqdm(total=total_size, unit='B', unit_scale=True, desc=save_name)

    for data in res.iter_content(chunk_size):
        stream.write(data)
        pbar.update(chunk_size)

connector.py 文件源码项目：catalearn 作者: Catalearn 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def upload_data(gpu_ip, job_hash, data_path):
    url = 'http://%s:%s/runJobDecorator' % (gpu_ip, settings.GPU_PORT)
    file_size = path.getsize(data_path)
    pbar = tqdm(total=file_size, unit='B', unit_scale=True)

    def callback(monitor):
        progress = monitor.bytes_read - callback.last_bytes_read
        pbar.update(progress)
        callback.last_bytes_read = monitor.bytes_read
    callback.last_bytes_read = 0

    with open(data_path, 'rb') as f:
        data = {
            'file': ('uploads.pkl', f, 'application/octet-stream'),
            'hash': job_hash
        }
        encoder = MultipartEncoder(
            fields=data
        )
        monitor = MultipartEncoderMonitor(encoder, callback)
        r = requests.post(url, data=monitor, headers={
            'Content-Type': monitor.content_type})

    remove(data_path)
    # pbar might not close when the user interrupts, need to fix this
    pbar.close()
    status_check(r)

connector.py 文件源码项目：catalearn 作者: Catalearn 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def download_and_unzip_result(url, job_hash):
    r = requests.get(url, stream=True)
    status_check(r)
    total_size = int(r.headers.get('content-length', 0))
    with open('download.zip', 'wb') as f:
        pbar = tqdm(total=total_size, unit='B', unit_scale=True)

        chunk_size = 1024 * 32  # 32kb
        for data in r.iter_content(chunk_size):
            f.write(data)
            pbar.update(chunk_size)
        # again there might be a pbar issue here
        pbar.close()

    zip_content = open("download.zip", "rb").read()
    z = ZipFile(io.BytesIO(zip_content))
    z.extractall()
    remove('download.zip')

    result = None  # output of the script
    new_files = None  # names of new files created by the script

    pickle_path = path.abspath(path.join(job_hash, job_hash + '.pkl'))
    if path.isfile(pickle_path):
        with open(pickle_path, 'rb') as f:
            # Hack: a workaround for dill's pickling problem
            # import_all()
            result = dill.load(f)
            # unimport_all()
        remove(pickle_path)

    if path.isdir(job_hash):
        new_files = listdir(job_hash)
        for name in new_files:
            rename(path.join(job_hash, name), name)
        rmtree(job_hash)

    return result, new_files

io.py 文件源码项目：Auspex 作者: BBN-Q 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def run(self):
        self.stream = self.sink.input_streams[0]
        axes = self.stream.descriptor.axes
        num_axes = len(axes)
        totals = [self.stream.descriptor.num_points_through_axis(axis) for axis in range(num_axes)]
        chunk_sizes = [max(1,self.stream.descriptor.num_points_through_axis(axis+1)) for axis in range(num_axes)]
        self.num = min(self.num, num_axes)

        self.bars   = []
        for i in range(self.num):
            if self.notebook:
                self.bars.append(tqdm_notebook(total=totals[i]/chunk_sizes[i]))
            else:
                self.bars.append(tqdm(total=totals[i]/chunk_sizes[i]))
        self.w_id   = 0
        while True:
            if self.stream.done() and self.w_id==self.stream.num_points():
                break

            new_data = np.array(await self.stream.queue.get()).flatten()
            while self.stream.queue.qsize() > 0:
                new_data = np.append(new_data, np.array(self.stream.queue.get_nowait()).flatten())
            self.w_id += new_data.size
            num_data = self.stream.points_taken
            for i in range(self.num):
                if num_data == 0:
                    if self.notebook:
                        self.bars[i].sp(close=True)
                        # Reset the progress bar with a new one
                        self.bars[i] = tqdm_notebook(total=totals[i]/chunk_sizes[i])
                    else:
                        # Reset the progress bar with a new one
                        self.bars[i].close()
                        self.bars[i] = tqdm(total=totals[i]/chunk_sizes[i])
                pos = int(10*num_data / chunk_sizes[i])/10.0 # One decimal is good enough
                if pos > self.bars[i].n:
                    self.bars[i].update(pos - self.bars[i].n)
                num_data = num_data % chunk_sizes[i]

utils.py 文件源码项目：autoreject 作者: autoreject 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def _pbar(iterable, desc, leave=True, position=None, verbose='progressbar'):
    if verbose == 'progressbar':
        from mne.utils import ProgressBar

        _ProgressBar = ProgressBar
        if not version_is_greater_equal('mne', '0.14'):
            class _ProgressBar(ProgressBar):
                def __iter__(self):
                    """Iterate to auto-increment the pbar with 1."""
                    self.max_value = len(iterable)
                    for obj in iterable:
                        yield obj
                    self.update_with_increment_value(1)

        pbar = _ProgressBar(iterable, mesg=desc, spinner=True)
    elif verbose == 'tqdm':
        from tqdm import tqdm
        pbar = tqdm(iterable, desc=desc, leave=leave, position=position,
                    dynamic_ncols=True)
    elif verbose == 'tqdm_notebook':
        from tqdm import tqdm_notebook
        pbar = tqdm_notebook(iterable, desc=desc, leave=leave,
                             position=position, dynamic_ncols=True)
    elif verbose is False:
        pbar = iterable
    return pbar

augmentation.py 文件源码项目：keras-toolbox 作者: hadim 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def augment(X, Y, n=5, elastic_alpha_range=20, rotation_range=30,
            shift_x_range=0.1, shift_y_range=0.1):
    """I admit this code is very ugly (much worse than Keras ImageDataGenerator API
       Note to myself : dont reinvent the wheel...
    """


    X_transformed = np.zeros([X.shape[0] * n * 3] + list(X.shape[1:]))
    Y_transformed = np.zeros([X.shape[0] * n * 3] + list(X.shape[1:]))

    z = 0
    for i in tnrange(n):

        for j, (XX, YY) in tqdm_notebook(enumerate(zip(X, Y)), total=len(X), disable=False, leave=False):
            X_transformed[z], Y_transformed[z] = elastic_transform([XX, YY], elastic_alpha_range, sigma=10)
            z += 1

        for j, (XX, YY) in tqdm_notebook(enumerate(zip(X, Y)), total=len(X), disable=False, leave=False):
            X_transformed[z], Y_transformed[z] = random_rotation([XX, YY], rg=rotation_range,
                                                                  row_index=1, col_index=2,
                                                                  channel_index=0,
                                                                  fill_mode='nearest', cval=0.)
            z += 1

        for j, (XX, YY) in tqdm_notebook(enumerate(zip(X, Y)), total=len(X), disable=False, leave=False):
            X_transformed[z], Y_transformed[z] = random_shift([XX, YY], shift_x_range, shift_y_range,
                                                               row_index=1, col_index=2,
                                                               channel_index=0,
                                                               fill_mode='nearest', cval=0.)
            z += 1

    return X_transformed, Y_transformed

chrono.py 文件源码项目：lang2program 作者: kelvinguu 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def verboserate(iterable, *args, **kwargs):
    """Iterate verbosely.

    Args:
        desc (str): prefix for the progress bar
        total (int): total length of the iterable
        See more options for tqdm.tqdm.

    """
    progress = tqdm_notebook if in_ipython() else tqdm
    for val in progress(iterable, *args, **kwargs):
        yield val

chrono.py 文件源码项目：lang2program 作者: kelvinguu 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def verboserate(iterable, *args, **kwargs):
    """Iterate verbosely.

    Args:
        desc (str): prefix for the progress bar
        total (int): total length of the iterable
        See more options for tqdm.tqdm.

    """
    progress = tqdm_notebook if in_ipython() else tqdm
    for val in progress(iterable, *args, **kwargs):
        yield val

callbacks.py 文件源码项目：skorch 作者: dnouri 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def on_epoch_begin(self, net, X=None, X_valid=None, **kwargs):
        # Assume it is a number until proven otherwise.
        batches_per_epoch = self.batches_per_epoch

        if self.batches_per_epoch == 'auto':
            batches_per_epoch = self._get_batches_per_epoch(net, X, X_valid)
        elif self.batches_per_epoch == 'count':
            # No limit is known until the end of the first epoch.
            batches_per_epoch = None

        if self._use_notebook():
            self.pbar = tqdm.tqdm_notebook(total=batches_per_epoch)
        else:
            self.pbar = tqdm.tqdm(total=batches_per_epoch)

gempro.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def kegg_mapping_and_metadata(self, kegg_organism_code, custom_gene_mapping=None, outdir=None,
                                  set_as_representative=False, force_rerun=False):
        """Map all genes in the model to KEGG IDs using the KEGG service.

        Steps:
            1. Download all metadata and sequence files in the sequences directory
            2. Creates a KEGGProp object in the protein.sequences attribute
            3. Returns a Pandas DataFrame of mapping results

        Args:
            kegg_organism_code (str): The three letter KEGG code of your organism
            custom_gene_mapping (dict): If your model genes differ from the gene IDs you want to map,
                custom_gene_mapping allows you to input a dictionary which maps model gene IDs to new ones.
                Dictionary keys must match model gene IDs.
            outdir (str): Path to output directory of downloaded files, must be set if GEM-PRO directories
                were not created initially
            set_as_representative (bool): If mapped KEGG IDs should be set as representative sequences
            force_rerun (bool): If you want to overwrite any existing mappings and files

        """

        # First map all of the organism's KEGG genes to UniProt
        kegg_to_uniprot = ssbio.databases.kegg.map_kegg_all_genes(organism_code=kegg_organism_code, target_db='uniprot')

        successfully_mapped_counter = 0

        for g in tqdm(self.genes):
            if custom_gene_mapping:
                kegg_g = custom_gene_mapping[g.id]
            else:
                kegg_g = g.id

            # Download both FASTA and KEGG metadata files
            kegg_prop = g.protein.load_kegg(kegg_id=kegg_g, kegg_organism_code=kegg_organism_code,
                                            download=True, outdir=outdir, set_as_representative=set_as_representative,
                                            force_rerun=force_rerun)

            # Update potentially old UniProt ID
            if kegg_g in kegg_to_uniprot.keys():
                kegg_prop.uniprot = kegg_to_uniprot[kegg_g]
                if g.protein.representative_sequence:
                    if g.protein.representative_sequence.kegg == kegg_prop.kegg:
                        g.protein.representative_sequence.uniprot = kegg_to_uniprot[kegg_g]

            # Keep track of missing mappings - missing is defined by no available sequence
            if kegg_prop.sequence_file:
                successfully_mapped_counter += 1

            log.debug('{}: loaded KEGG information for gene'.format(g.id))

        log.info('{}/{}: number of genes mapped to KEGG'.format(successfully_mapped_counter, len(self.genes)))
        log.info('Completed ID mapping --> KEGG. See the "df_kegg_metadata" attribute for a summary dataframe.')

gempro.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def uniprot_mapping_and_metadata(self, model_gene_source, custom_gene_mapping=None, outdir=None,
                                     set_as_representative=False, force_rerun=False):
        """Map all genes in the model to UniProt IDs using the UniProt mapping service.
        Also download all metadata and sequences.

        Args:
            model_gene_source (str): the database source of your model gene IDs.
                See: http://www.uniprot.org/help/api_idmapping
                Common model gene sources are:

                * Ensembl Genomes - ``ENSEMBLGENOME_ID`` (i.e. E. coli b-numbers)
                * Entrez Gene (GeneID) - ``P_ENTREZGENEID``
                * RefSeq Protein - ``P_REFSEQ_AC``

            custom_gene_mapping (dict): If your model genes differ from the gene IDs you want to map,
                custom_gene_mapping allows you to input a dictionary which maps model gene IDs to new ones.
                Dictionary keys must match model genes.
            outdir (str): Path to output directory of downloaded files, must be set if GEM-PRO directories
                were not created initially
            set_as_representative (bool): If mapped UniProt IDs should be set as representative sequences
            force_rerun (bool): If you want to overwrite any existing mappings and files

        """

        # Allow model gene --> custom ID mapping ({'TM_1012':'TM1012'})
        if custom_gene_mapping:
            genes_to_map = list(custom_gene_mapping.values())
        else:
            genes_to_map = [x.id for x in self.genes]

        # Map all IDs first to available UniProts
        genes_to_uniprots = bs_unip.mapping(fr=model_gene_source, to='ACC', query=genes_to_map)

        successfully_mapped_counter = 0
        for g in tqdm(self.genes):
            if custom_gene_mapping and g.id in custom_gene_mapping.keys():
                uniprot_gene = custom_gene_mapping[g.id]
            else:
                uniprot_gene = g.id

            if uniprot_gene not in list(genes_to_uniprots.keys()):
                log.debug('{}: unable to map to UniProt'.format(g.id))
            else:
                for mapped_uniprot in genes_to_uniprots[uniprot_gene]:
                    try:
                        uniprot_prop = g.protein.load_uniprot(uniprot_id=mapped_uniprot, download=True, outdir=outdir,
                                                              set_as_representative=set_as_representative,
                                                              force_rerun=force_rerun)
                    except HTTPError as e:
                        log.error('{}, {}: unable to complete web request'.format(g.id, mapped_uniprot))
                        print(e)
                        continue

                    if uniprot_prop.sequence_file or uniprot_prop.metadata_file:
                        successfully_mapped_counter += 1

        log.info('{}/{}: number of genes mapped to UniProt'.format(successfully_mapped_counter, len(self.genes)))
        log.info('Completed ID mapping --> UniProt. See the "df_uniprot_metadata" attribute for a summary dataframe.')

gempro.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def get_scratch_predictions(self, path_to_scratch, results_dir, scratch_basename='scratch', num_cores=1,
                                exposed_buried_cutoff=25, custom_gene_mapping=None):
        """Run and parse ``SCRATCH`` results to predict secondary structure and solvent accessibility.
        Annotations are stored in the protein's representative sequence at:

            * ``.annotations``
            * ``.letter_annotations``

        Args:
            path_to_scratch (str): Path to SCRATCH executable
            results_dir (str): Path to SCRATCH results folder, which will have the files (scratch.ss, scratch.ss8,
                scratch.acc, scratch.acc20)
            scratch_basename (str): Basename of the SCRATCH results ('scratch' is default)
            num_cores (int): Number of cores to use to parallelize SCRATCH run
            exposed_buried_cutoff (int): Cutoff of exposed/buried for the acc20 predictions
            custom_gene_mapping (dict): Default parsing of SCRATCH output files is to look for the model gene IDs. If
                your output files contain IDs which differ from the model gene IDs, use this dictionary to map model
                gene IDs to result file IDs. Dictionary keys must match model genes.

        """
        if not self.genome_path:
            # Write all sequences as one file
            all_seqs = self.write_representative_sequences_file(outname=self.id)

        # Runs SCRATCH or loads existing results in results_dir
        scratch = SCRATCH(project_name=scratch_basename, seq_file=self.genome_path)
        scratch.run_scratch(path_to_scratch=path_to_scratch, num_cores=num_cores, outdir=results_dir)

        counter = 0

        # Adding the scratch annotations to the representative_sequences letter_annotations
        for g in tqdm(self.genes_with_a_representative_sequence):
            if custom_gene_mapping:
                g_id = custom_gene_mapping[g.id]
            else:
                g_id = g.id

            if g_id in scratch.sspro_summary():
                # Secondary structure
                g.protein.representative_sequence.annotations.update(scratch.sspro_summary()[g_id])
                g.protein.representative_sequence.annotations.update(scratch.sspro8_summary()[g_id])
                g.protein.representative_sequence.letter_annotations['SS-sspro'] = scratch.sspro_results()[g_id]
                g.protein.representative_sequence.letter_annotations['SS-sspro8'] = scratch.sspro8_results()[g_id]

                # Solvent accessibility
                g.protein.representative_sequence.annotations.update(scratch.accpro_summary()[g_id])
                g.protein.representative_sequence.annotations.update(scratch.accpro20_summary(exposed_buried_cutoff)[g_id])
                g.protein.representative_sequence.letter_annotations['RSA-accpro'] = scratch.accpro_results()[g_id]
                g.protein.representative_sequence.letter_annotations['RSA-accpro20'] = scratch.accpro20_results()[g_id]

                counter += 1
            else:
                log.error('{}: missing SCRATCH results'.format(g.id))

        log.info('{}/{}: number of genes with SCRATCH predictions loaded'.format(counter, len(self.genes)))

gempro.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def get_tmhmm_predictions(self, tmhmm_results, custom_gene_mapping=None):
        """Parse TMHMM results and store in the representative sequences.

        This is a basic function to parse pre-run TMHMM results. Run TMHMM from the 
        web service (http://www.cbs.dtu.dk/services/TMHMM/) by doing the following:

            1. Write all representative sequences in the GEM-PRO using the function ``write_representative_sequences_file``
            2. Upload the file to http://www.cbs.dtu.dk/services/TMHMM/ and choose "Extensive, no graphics" as the output
            3. Copy and paste the results (ignoring the top header and above "HELP with output formats") into a file and save it
            4. Run this function on that file

        Args:
            tmhmm_results (str): Path to TMHMM results (long format)
            custom_gene_mapping (dict): Default parsing of TMHMM output is to look for the model gene IDs. If
                your output file contains IDs which differ from the model gene IDs, use this dictionary to map model
                gene IDs to result file IDs. Dictionary keys must match model genes.

        """
        # TODO: refactor to Protein class?
        tmhmm_dict = ssbio.protein.sequence.properties.tmhmm.parse_tmhmm_long(tmhmm_results)

        counter = 0
        for g in tqdm(self.genes_with_a_representative_sequence):
            if custom_gene_mapping:
                g_id = custom_gene_mapping[g.id]
            else:
                g_id = g.id

            if g_id in tmhmm_dict:
                log.debug('{}: loading TMHMM results'.format(g.id))
                if not tmhmm_dict[g_id]:
                    log.error("{}: missing TMHMM results".format(g.id))
                g.protein.representative_sequence.annotations['num_tm_helix-tmhmm'] = tmhmm_dict[g_id]['num_tm_helices']
                g.protein.representative_sequence.letter_annotations['TM-tmhmm'] = tmhmm_dict[g_id]['sequence']
                counter += 1
            else:
                log.error("{}: missing TMHMM results".format(g.id))

        log.info('{}/{}: number of genes with TMHMM predictions loaded'.format(counter, len(self.genes)))

    ### END SEQUENCE RELATED METHODS ###
    ####################################################################################################################

    ####################################################################################################################
    ### STRUCTURE RELATED METHODS ###

gempro.py 文件源码项目：ssbio 作者: SBRG 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def get_itasser_models(self, homology_raw_dir, custom_itasser_name_mapping=None, outdir=None, force_rerun=False):
        """Copy generated I-TASSER models from a directory to the GEM-PRO directory.

        Args:
            homology_raw_dir (str): Root directory of I-TASSER folders.
            custom_itasser_name_mapping (dict): Use this if your I-TASSER folder names differ from your model gene names.
                Input a dict of {model_gene: ITASSER_folder}.
            outdir (str): Path to output directory of downloaded files, must be set if GEM-PRO directories
                were not created initially
            force_rerun (bool): If homology files should be copied again even if they exist in the GEM-PRO directory

        """
        counter = 0

        for g in tqdm(self.genes):
            if custom_itasser_name_mapping and g.id in custom_itasser_name_mapping:
                hom_id = custom_itasser_name_mapping[g.id]
                if not op.exists(op.join(homology_raw_dir, hom_id)):
                    hom_id = g.id
            else:
                hom_id = g.id

            # The name of the actual pdb file will be $GENEID_model1.pdb
            new_itasser_name = hom_id + '_model1'
            orig_itasser_dir = op.join(homology_raw_dir, hom_id)

            try:
                itasser_prop = g.protein.load_itasser_folder(ident=hom_id, itasser_folder=orig_itasser_dir,
                                                             organize=True, outdir=outdir,
                                                             organize_name=new_itasser_name,
                                                             force_rerun=force_rerun)
            except OSError:
                log.debug('{}: homology model folder unavailable'.format(g.id))
                continue
            except IOError:
                log.debug('{}: homology model unavailable'.format(g.id))
                continue

            if itasser_prop.structure_file:
                counter += 1
            else:
                log.debug('{}: homology model file unavailable, perhaps modelling did not finish'.format(g.id))

        log.info('Completed copying of {} I-TASSER models to GEM-PRO directory. See the "df_homology_models" attribute for a summary dataframe.'.format(counter))