def get_fasta_seq_dictonary(fa_file):
#returns fasta files dictonary for length and gc content
dict_fa = {}
for seq_record in SeqIO.parse(fa_file, "fasta"):
fa_id = seq_record.id
faseq = seq_record.seq
gc_count = GC(faseq)
seq_len = len(faseq)
#calculate gc content distribution to nearest 10
gc_content_decimal_distribution = math.floor(gc_count / 10) * 10 #10-bin window
#gc_content_decimal_distribution = gc_count/seq_len
dict_fa[fa_id] = [faseq, seq_len, gc_content_decimal_distribution]
return dict_fa
评论列表
文章目录