snpmatch.py 文件源码

python
阅读 33 收藏 0 点赞 0 评论 0

项目:SNPmatch 作者: Gregor-Mendel-Institute 项目源码 文件源码
def readVcf(inFile, logDebug):
    log.info("reading the VCF file")
    ## We read only one sample from the VCF file
    if logDebug:
        vcf = allel.read_vcf(inFile, samples = [0], fields = '*')
    else:
        sys.stderr = StringIO.StringIO()
        vcf = allel.read_vcf(inFile, samples = [0], fields = '*')
        #vcf = vcfnp.variants(inFile, cache=False).view(np.recarray)
        #vcfD = vcfnp.calldata_2d(inFile, cache=False).view(np.recarray)
        sys.stderr = sys.__stderr__
    (snpCHR, snpsREQ) = parseChrName(vcf['variants/CHROM'])
    try:
        snpGT = allel.GenotypeArray(vcf['calldata/GT']).to_gt()[snpsREQ, 0]
    except AttributeError:
        die("input VCF file doesnt have required GT field")
    snpsREQ = snpsREQ[np.where(snpGT != './.')[0]]
    snpGT = allel.GenotypeArray(vcf['calldata/GT']).to_gt()[snpsREQ, 0]
    if 'calldata/PL' in sorted(vcf.keys()):
        snpWEI = np.copy(vcf['calldata/PL'][snpsREQ, 0]).astype('float')
        snpWEI = snpWEI/(-10)
        snpWEI = np.exp(snpWEI)
    else:
        snpBinary = parseGT(snpGT)
        snpWEI = np.ones((len(snpsREQ), 3))  ## for homo and het
        snpWEI[np.where(snpBinary != 0),0] = 0
        snpWEI[np.where(snpBinary != 1),2] = 0
        snpWEI[np.where(snpBinary != 2),1] = 0
    snpCHR = snpCHR[snpsREQ]
    DPmean = np.mean(vcf['calldata/DP'][snpsREQ,0])
    snpPOS = np.array(vcf['variants/POS'][snpsREQ])
    return (DPmean, snpCHR, snpPOS, snpGT, snpWEI)
评论列表


问题


面经


文章

微信
公众号

扫码关注公众号