def run_fatcat_all_by_all(list_of_structure_paths, fatcat_sh, outdir='', silent=True, force_rerun=False):
"""Run FATCAT on all pairs of structures given a list of structures.
Args:
list_of_structure_paths (list): List of PDB file paths
fatcat_sh (str): Path to "runFATCAT.sh" executable script
outdir (str): Path to where FATCAT XML output files will be saved
silent (bool): If command to run FATCAT should be printed to stdout
force_rerun (bool): If FATCAT should be run even if XML output files already exist
Returns:
Pandas DataFrame: TM-scores (similarity) between all structures
"""
structure_ids = {x: i for i, x in enumerate(list_of_structure_paths)}
comps = itertools.combinations(list_of_structure_paths, 2)
tm_score_matrix = np.eye(len(list_of_structure_paths))
for pdb1, pdb2 in tqdm(comps):
fatcat_file = run_fatcat(pdb1, pdb2, fatcat_sh, outdir=outdir, silent=silent, force_rerun=force_rerun)
tm_score_matrix[structure_ids[pdb1], structure_ids[pdb2]] = parse_fatcat(fatcat_file)['tm_score']
tm_score_matrix[structure_ids[pdb2], structure_ids[pdb1]] = parse_fatcat(fatcat_file)['tm_score']
# Convert to dataframe with filenames
filenames = [op.splitext(op.basename(x))[0] for x in list_of_structure_paths]
tm_score_matrix_annotated = pd.DataFrame(data=tm_score_matrix, columns=filenames, index=filenames)
return tm_score_matrix_annotated
评论列表
文章目录