def merge_by_key(bam_filenames, key_func, bam_out):
file_cache = tk_cache.FileHandleCache(mode='rb', open_func=pysam.Samfile)
total_reads = 0
heap = []
for bam_filename in bam_filenames:
try:
bam = file_cache.get(bam_filename)
first_read = bam.next()
heapq.heappush(heap, (key_func(first_read), first_read, bam_filename))
except StopIteration:
pass
while len(heap) > 0:
# Get the minimum item and write it to the bam.
key, read, bam_filename = heapq.heappop(heap)
bam = file_cache.get(bam_filename)
bam_out.write(read)
total_reads += 1
# Get the next read from the source bam we just wrote from
# If that BAM file is out of reads, then we leave that one out
try:
next_read = bam.next()
heapq.heappush(heap, (key_func(next_read), next_read, bam_filename))
except StopIteration:
pass
return total_reads
评论列表
文章目录