def get_cds_sequence(rna,c_df,chrom_seq):
pr_df = c_df[c_df['rna_id'].values==rna]
strand = list(set(pr_df[6].tolist()))
if len(strand) == 2:
assert False, rna+' has both strands'
# seqeunce merge
chr_seq = Seq('')
for start,end in zip(pr_df[3],pr_df[4]):
if strand == ['-']:
chr_seq += chrom_seq[start-1:end].reverse_complement()
else:
chr_seq += chrom_seq[start-1:end]
# consider the frame information in 7th column
frame = int(pr_df[7].tolist()[0])
rna_seq = chr_seq[frame:]
return str(rna_seq.translate())
评论列表
文章目录