def blast_seqs_to_pdb(self, seq_ident_cutoff=0, evalue=0.0001, all_genes=False, display_link=False,
outdir=None, force_rerun=False):
"""BLAST each representative protein sequence to the PDB. Saves raw BLAST results (XML files).
Args:
seq_ident_cutoff (float, optional): Cutoff results based on percent coverage (in decimal form)
evalue (float, optional): Cutoff for the E-value - filters for significant hits. 0.001 is liberal,
0.0001 is stringent (default).
all_genes (bool): If all genes should be BLASTed, or only those without any structures currently mapped
display_link (bool, optional): Set to True if links to the HTML results should be displayed
outdir (str): Path to output directory of downloaded files, must be set if GEM-PRO directories
were not created initially
force_rerun (bool, optional): If existing BLAST results should not be used, set to True. Default is False
"""
counter = 0
for g in tqdm(self.genes_with_a_representative_sequence):
# If all_genes=False, BLAST only genes without a uniprot -> pdb mapping
if g.protein.num_structures_experimental > 0 and not all_genes and not force_rerun:
log.debug('{}: skipping BLAST, {} experimental structures already mapped '
'and all_genes flag is False'.format(g.id,
g.protein.num_structures_experimental))
continue
# BLAST the sequence to the PDB
new_pdbs = g.protein.blast_representative_sequence_to_pdb(seq_ident_cutoff=seq_ident_cutoff,
evalue=evalue,
display_link=display_link,
outdir=outdir,
force_rerun=force_rerun)
if new_pdbs:
counter += 1
log.debug('{}: {} PDBs BLASTed'.format(g.id, len(new_pdbs)))
else:
log.debug('{}: no BLAST results'.format(g.id))
log.info('Completed sequence --> PDB BLAST. See the "df_pdb_blast" attribute for a summary dataframe.')
log.info('{}: number of genes with additional structures added from BLAST'.format(counter))
评论列表
文章目录