Genome_Annotation.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:NGS-Pipeline 作者: LewisLabUCSD 项目源码 文件源码
def add_gene_function(blast_db,evm_path):
    '''add gene symbol to gff file. the information is from the blast results
    '''
    blastp_fn = blast_db + '/blastp.txt'
    blast_df = pd.read_csv(blastp_fn,sep='\t',usecols=[0,1,2],names=['ref','query','per'])
    blast_df = blast_df[blast_df['per'].values>50]
    blast_df['rna'] = blast_df['ref'].map(lambda x: '.'.join(x.split('.')[-2:]))
    blast_df['pr'] = blast_df['query'].map(lambda x: x.split('|')[-1].split('_')[0])
    rna_pr_dic = blast_df.set_index('rna')['pr'].to_dict()

    evm_gff= evm_path + '/evm.merge.gff'
    gff_df = pd.read_csv(evm_gff,sep='\t',header=None)
    gff_df[8] = gff_df[8].map(lambda x: add_gene_name(x,rna_pr_dic))
    gff_df = gff_df[~gff_df[8].map(lambda x: 'gene=LORF2' in x)]
    gff_df.to_csv(blast_db +'/final.gff',sep='\t',index=False)

# add_gene_function(blast_db,evm_path)
#===============================================================================
#                     process the gmap results and exonerates results directly
#===============================================================================
#=============== 1. get all mapped geneid, rna_accession, pr_accession
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号