def getUniqueRedundSets(fileName,speciesName):
'''Run through genbank file fileName, get set of unique genes (with no
redundancies), and set with redundancies.'''
f = open(fileName, 'rU')
geneL=[]
for record in SeqIO.parse(f, "genbank"):
# iterate through the genes on the chromosome
for feature in record.features:
# choose only the features that are protein coding genes
if feature.type == "CDS" and 'protein_id' in feature.qualifiers:
geneName = speciesName + '-' + feature.qualifiers['protein_id'][0]
geneL.append(geneName)
f.close()
# now figure out which ones are unique
uniqueS = set()
redundS = set()
for gene in geneL:
if geneL.count(gene)>1:
redundS.add(gene)
else:
uniqueS.add(gene)
return uniqueS,redundS
评论列表
文章目录