def demultiplexFastqByBestMatch(aFASTQFile,aHandleList,aMismatch,isForward=True):
# we are exporting each handle into a different file
# this dictionary has the sequence handles as keys and the files as values
exportFiles = fileFactory(aHandleList)
sw = swFactory()
fh = open(aFASTQFile,'rU')
for idx, record in enumerate(SeqIO.parse(fh, "fastq")):
seqStr = str(record.seq)
# if we are looking for reverse sequence, get it from the end
if isForward:
seqStr = seqStr[0:100]
else:
#import pdb; pdb.set_trace()
seqStr = seqStr[len(seqStr)-99:]
# bestMatches is a list of handles having the same alignment score
# if there is only one, save it, else ignore ambiguities
bestMatches = getBestMatches(seqStr, aHandleList, sw, aMismatch) # get the best matches for all the handles
if len(bestMatches) == 1: # there is a single best match: store it
# unfortunately FASTQ export for very long reads looks to be buggy.
# So we have to export records ourselves
#SeqIO.write(record,exportFiles[bestMatches[0]],"fastq")
writeFASTQRecord(record,exportFiles[bestMatches[0]])
print "Wrote record " +str(idx)+" "+ record.id + " to " + (exportFiles[bestMatches[0]]).name
fh.close()
# be nice and close the exported files
for seq in aHandleList:
exportFiles[seq].close()
print "ready "
demultiplexON.py 文件源码
python
阅读 25
收藏 0
点赞 0
评论 0
评论列表
文章目录