genomics.py 文件源码-python代码片段

def parseGenoFile(genoFile, names = None, includePositions = False, splitPhased=False, ploidy=None, headerLine = None):
    #get file headers
    headers = genoFile.readline().split()
    allNames = headers[2:]
    if names is None: names = allNames
    if splitPhased:
        if ploidy is None: ploidy = [2]*len(allNames)
        ploidyDict = dict(zip(allNames, ploidy))
        #if splitting phased, we need to split names too
        allNames = [n + "_" + letter for n in allNames for letter in string.ascii_uppercase[:ploidyDict[n]]]
        names = [n + "_" + letter for n in names for letter in string.ascii_uppercase[:ploidyDict[n]]]
    #indices of samples
    nameIndices = dict(zip(names, [allNames.index(name) for name in names])) # records file column for each name
    #initialise an empty window
    window = GenoWindow(names = names)
    for line in iter(genoFile.readline,''):
        site = parseGenoLine(line,splitPhased)
        window.addSite(GTs=[site.GTs[nameIndices[name]] for name in names], position=site.position, ignorePosition= not includePositions)

    return window


##########################################################################################################

#functions to make and parse alignment strings in fasta or phylip format