demultiplexON.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:icing 作者: NationalGenomicsInfrastructure 项目源码 文件源码
def demultiplexFastqByBestMatch(aFASTQFile,aHandleList,aMismatch,isForward=True):
    # we are exporting each handle into a different file
    # this dictionary has the sequence handles as keys and the files as values
    exportFiles = fileFactory(aHandleList)
    sw = swFactory()
    fh = open(aFASTQFile,'rU')
    for idx, record in enumerate(SeqIO.parse(fh, "fastq")):
        seqStr = str(record.seq)
        # if we are looking for reverse sequence, get it from the end
        if isForward:
            seqStr = seqStr[0:100]
        else:
            #import pdb; pdb.set_trace()
            seqStr = seqStr[len(seqStr)-99:]
        # bestMatches is a list of handles having the same alignment score
        # if there is only one, save it, else ignore ambiguities
        bestMatches = getBestMatches(seqStr, aHandleList, sw, aMismatch)   # get the best matches for all the handles
        if len(bestMatches) == 1:                                       # there is a single best match: store it
            # unfortunately FASTQ export for very long reads looks to be buggy.
            # So we have to export records ourselves
            #SeqIO.write(record,exportFiles[bestMatches[0]],"fastq")
            writeFASTQRecord(record,exportFiles[bestMatches[0]])
            print "Wrote record " +str(idx)+" "+ record.id + " to " + (exportFiles[bestMatches[0]]).name
    fh.close()
    # be nice and close the exported files
    for seq in aHandleList:
        exportFiles[seq].close()
    print "ready "
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号