def get_hits(filename, criteria='cum_BLAST_score'):
"""
Reproduces original Tiago's code: table_1_extender.py
In the future allow different criteria. Right now it takes
from the very first block, which has the highest Cumulative
BLAST.
"""
with open(filename) as f:
df = antiSMASH_to_dataFrame(f.read())
df.dropna(subset=['query_gene'], inplace=True)
df.sort_values(by=criteria, ascending=False, na_position='last',
inplace=True)
return df.groupby('query_gene', as_index=False).first()
评论列表
文章目录