def _parse_gff(file, file_name):
records = []
with open(file, "r") as infile:
for i, rec in enumerate(GFF.parse(infile)):
# Enumerates the contigs (can be chromosome, plasmid and unidentified)
# based on total number of contigs (not type)
rec_id = rec.id + "_" + str(i + 1)
if len(rec_id) > 15:
rec_id = "contig_" + "_" + str(i + 1)
seq_record = SeqRecord(Seq(str(rec.seq), IUPAC.unambiguous_dna), id=rec_id,
description=os.path.basename(file_name),
features=rec.features)
records.append(seq_record)
return records
评论列表
文章目录