def create_bam_outfile(file_name, chrom_names, chrom_lengths, template=None, pgs=None, cos=None, rgs=None, replace_rg=False):
""" Creates a bam file with given chromosome names and lengths.
template is an existing bam file. If it is specified, chrom_names and chrom_lengths
are ignored. pg is dictionary specifying a 'PG' entry. ID field is required; PN/CL/PP/DS/VN fields are optional.
rgs is a list of dicts specifiying an 'RG' entry. If replace_rg is True, the existing 'RG' entry is overwritten.
"""
if template:
header = template.header
if pgs is not None:
for pg in pgs:
if not header.has_key('PG'):
header['PG'] = []
# add in the PP field based on previous PG entry
if len(header['PG']) > 0:
pp = header['PG'][-1]['ID']
if pp is not None:
pg['PP'] = pp
header['PG'].append(pg)
if cos is not None:
for co in cos:
if not header.has_key('CO'):
header['CO'] = []
header['CO'].append(co)
if rgs is not None:
if replace_rg and header.has_key('RG') and len(rgs) > 0:
header['RG'] = []
for rg in rgs:
if not header.has_key('RG'):
header['RG'] = []
header['RG'].append(rg)
bam_file = pysam.Samfile(file_name, 'wb', header=header)
tids = {name:n for (n, name) in enumerate(template.references)}
else:
header = {'SQ': [{'SN': chrom_names[n], 'LN': chrom_lengths[n]} for n in xrange(len(chrom_names))]}
bam_file = pysam.Samfile(file_name, 'wb', header=header)
tids = {chrom_names[n]:n for n in xrange(len(chrom_names))}
return bam_file, tids
评论列表
文章目录