def parse_psqs(psqs_results_file):
"""Parse a PSQS result file and returns a Pandas DataFrame of the results
Args:
psqs_results_file: Path to psqs results file
Returns:
Pandas DataFrame: Summary of PSQS results
"""
# TODO: generalize column names for all results, save as dict instead
psqs_results = pd.read_csv(psqs_results_file, sep='\t', header=None)
psqs_results['pdb_file'] = psqs_results[0].apply(lambda x: str(x).strip('./').strip('.pdb'))
psqs_results = psqs_results.rename(columns = {1:'psqs_local', 2:'psqs_burial', 3:'psqs_contact', 4:'psqs_total'}).drop(0, axis=1)
psqs_results['u_pdb'] = psqs_results['pdb_file'].apply(lambda x: x.upper() if len(x)==4 else np.nan)
psqs_results['i_entry_name'] = psqs_results['pdb_file'].apply(lambda x: x.split('_model1')[0] if len(x)>4 else np.nan)
psqs_results = psqs_results[pd.notnull(psqs_results.psqs_total)]
return psqs_results
评论列表
文章目录