fatcat.py 文件源码-python代码片段

def run_fatcat_all_by_all(list_of_structure_paths, fatcat_sh, outdir='', silent=True, force_rerun=False):
    """Run FATCAT on all pairs of structures given a list of structures.

    Args:
        list_of_structure_paths (list): List of PDB file paths 
        fatcat_sh (str): Path to "runFATCAT.sh" executable script
        outdir (str): Path to where FATCAT XML output files will be saved 
        silent (bool): If command to run FATCAT should be printed to stdout
        force_rerun (bool): If FATCAT should be run even if XML output files already exist 

    Returns:
        Pandas DataFrame: TM-scores (similarity) between all structures

    """
    structure_ids = {x: i for i, x in enumerate(list_of_structure_paths)}

    comps = itertools.combinations(list_of_structure_paths, 2)
    tm_score_matrix = np.eye(len(list_of_structure_paths))

    for pdb1, pdb2 in tqdm(comps):
        fatcat_file = run_fatcat(pdb1, pdb2, fatcat_sh, outdir=outdir, silent=silent, force_rerun=force_rerun)
        tm_score_matrix[structure_ids[pdb1], structure_ids[pdb2]] = parse_fatcat(fatcat_file)['tm_score']
        tm_score_matrix[structure_ids[pdb2], structure_ids[pdb1]] = parse_fatcat(fatcat_file)['tm_score']

    # Convert to dataframe with filenames
    filenames = [op.splitext(op.basename(x))[0] for x in list_of_structure_paths]
    tm_score_matrix_annotated = pd.DataFrame(data=tm_score_matrix, columns=filenames, index=filenames)

    return tm_score_matrix_annotated