def getSeqs(sequences):
'''
In this function, we "clean up" a FASTA file. The input is a FASTA file containing the 16S rDNA sequences of all the OTUs in the microbiome study the user is exploring. This function makes sure that each genomic sequence only spans one line in the file.
:param sequences: FASTA file with 16S dRNA sequences spanning multiple lines in addition to the sequence identifier line.
:returns seqs: list of 16s dRNA sequences in the FASTA format spanning only one line in addition to the sequence identifier line.
'''
cherrypy.log("We are now going to clean up the FASTA file given (%s) so that each DNA sequence does not span multiple lines."%sequences)
userSeqs = open(sequences,'r')
seq = ''
for line in userSeqs:
if line.startswith('>'):
seq += '+++++\n'
seq += line
else:
seq += line.rstrip()
seqs = seq.split('+++++\n')
userSeqs.close()
cherrypy.log("Clean up is finished. We are ready to fetch only the representative OTU sequences that we are interested in using in the rest of our analysis." )
return seqs
评论列表
文章目录