def realigner(out_dir, tmp_dir, winsize=50, unstranded=False):
"""DOCSTRING
Args:
Returns:
"""
# file handlers
mbam = pysam.Samfile(os.path.join(tmp_dir, 'multi.sorted.bam'),'rb')
ubam = pysam.Samfile(os.path.join(tmp_dir, 'unique.sorted.bam'),'rb')
obam = pysam.Samfile(os.path.join(out_dir, 'realigned.bam'), 'wb', template = mbam)
chr_list=[x['SN'] for x in ubam.header['SQ']]
# construct the mread_dict; this will be needed throughout
mread_dict = defaultdict(list)
for alignment in mbam:
mread_dict[alignment.qname].append(alignment)
# keep a record of processed reads
processed_mreads = set()
# iterate through all mreads
for read_qname in mread_dict:
if read_qname in processed_mreads:
continue
## construct the fully-connected subgraph for each read
read_to_locations, processed_mreads = \
construct_subgraph(mbam, read_qname, mread_dict, processed_mreads, chr_list, winsize=winsize, unstranded=unstranded)
subgraph = set()
for read in read_to_locations:
_ = map(subgraph.add, read_to_locations[read].keys())
subgraph = list(subgraph)
## build the BIT tracks
node_track, multi_reads_weights = \
construct_BIT_track(subgraph, read_to_locations, ubam, unstranded)
## run EM
multi_reads_weights = \
run_EM(node_track, multi_reads_weights, w=winsize)
## write to obam
for read in multi_reads_weights:
for node in multi_reads_weights[read]:
alignment = read_to_locations[read][node]
score = round(multi_reads_weights[read][node][0], 3)
alignment.set_tag('AS', score)
alignment.set_tag('PG', 'CLAM')
obam.write(alignment)
# sort the final output
logger.info('sorting output')
obam.close()
ubam.close()
mbam.close()
obam_sorted_fn = os.path.join(out_dir, 'realigned.sorted.bam')
pysam.sort('-o', obam_sorted_fn, os.path.join(out_dir, 'realigned.bam'))
pysam.index(obam_sorted_fn)
os.remove(os.path.join(out_dir, 'realigned.bam'))
return
评论列表
文章目录