def parse_coach_ec_df(infile):
"""Parse the EC.dat output file of COACH and return a dataframe of results
EC.dat contains the predicted EC number and active residues.
The columns are: PDB_ID, TM-score, RMSD, Sequence identity,
Coverage, Confidence score, EC number, and Active site residues
Args:
infile (str): Path to EC.dat
Returns:
DataFrame: Pandas DataFrame summarizing EC number predictions
"""
ec_df = pd.read_table(infile, delim_whitespace=True,
names=['pdb_template', 'tm_score', 'rmsd', 'seq_ident', 'seq_coverage',
'c_score', 'ec_number', 'binding_residues'])
ec_df['pdb_template_id'] = ec_df['pdb_template'].apply(lambda x: x[:4])
ec_df['pdb_template_chain'] = ec_df['pdb_template'].apply(lambda x: x[4])
ec_df = ec_df[['pdb_template_id', 'pdb_template_chain', 'tm_score', 'rmsd',
'seq_ident', 'seq_coverage', 'c_score', 'ec_number', 'binding_residues']]
ec_df['c_score'] = pd.to_numeric(ec_df.c_score, errors='coerce')
return ec_df
评论列表
文章目录