def calc_AB(vcf):
''' Calculate allele balance for all samples in a given
pdVCF. Also converts DP & GQ to numeric type.
Args:
vcf: pdVCF with genotype information extracted
Notes:
ONLY WORKS FOR BIALLELIC VARIANTS
'''
sam = vcf.columns.levels[0][0]
vcf[sam,'DP'] = pd.to_numeric(vcf[sam,'DP'].str.replace('.', '0')) # bcftools places '.' in empty fields
vcf[sam,'GQ'] = pd.to_numeric(vcf[sam,'GQ'].str.replace('.', '0'))
AD = vcf.xs('AD', level=1, axis=1).unstack().str.split(",", n=2)
DP = vcf.xs('DP', level=1, axis=1).unstack()
AB = round(pd.to_numeric(AD.str[1]) / pd.to_numeric(DP), 2)
vcf[sam, 'AB'] = AB.tolist()
return vcf
评论列表
文章目录