genomics.py 文件源码

python
阅读 41 收藏 0 点赞 0 评论 0

项目:genomics_general 作者: simonhmartin 项目源码 文件源码
def parseGenoFile(genoFile, names = None, includePositions = False, splitPhased=False, ploidy=None, headerLine = None):
    #get file headers
    headers = genoFile.readline().split()
    allNames = headers[2:]
    if names is None: names = allNames
    if splitPhased:
        if ploidy is None: ploidy = [2]*len(allNames)
        ploidyDict = dict(zip(allNames, ploidy))
        #if splitting phased, we need to split names too
        allNames = [n + "_" + letter for n in allNames for letter in string.ascii_uppercase[:ploidyDict[n]]]
        names = [n + "_" + letter for n in names for letter in string.ascii_uppercase[:ploidyDict[n]]]
    #indices of samples
    nameIndices = dict(zip(names, [allNames.index(name) for name in names])) # records file column for each name
    #initialise an empty window
    window = GenoWindow(names = names)
    for line in iter(genoFile.readline,''):
        site = parseGenoLine(line,splitPhased)
        window.addSite(GTs=[site.GTs[nameIndices[name]] for name in names], position=site.position, ignorePosition= not includePositions)

    return window


##########################################################################################################

#functions to make and parse alignment strings in fasta or phylip format
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号