def parseGenoFile(genoFile, names = None, includePositions = False, splitPhased=False, ploidy=None, headerLine = None):
#get file headers
headers = genoFile.readline().split()
allNames = headers[2:]
if names is None: names = allNames
if splitPhased:
if ploidy is None: ploidy = [2]*len(allNames)
ploidyDict = dict(zip(allNames, ploidy))
#if splitting phased, we need to split names too
allNames = [n + "_" + letter for n in allNames for letter in string.ascii_uppercase[:ploidyDict[n]]]
names = [n + "_" + letter for n in names for letter in string.ascii_uppercase[:ploidyDict[n]]]
#indices of samples
nameIndices = dict(zip(names, [allNames.index(name) for name in names])) # records file column for each name
#initialise an empty window
window = GenoWindow(names = names)
for line in iter(genoFile.readline,''):
site = parseGenoLine(line,splitPhased)
window.addSite(GTs=[site.GTs[nameIndices[name]] for name in names], position=site.position, ignorePosition= not includePositions)
return window
##########################################################################################################
#functions to make and parse alignment strings in fasta or phylip format
评论列表
文章目录