def getAabrhRawScoreSummmaryD(strainNamesL,aabrhL,scoresO,geneNames):
'''Given raw scores and a directory with blast output, finds the sets of all around best reciprocal hits. Then for each pair of species, calculates the mean and standard deviation of scores and stores in a dictionary.'''
# now loop through these, sorting scores into a dict keyed by species pair.
# create dictionary, (representing an upper triangular matrix)
spScoreD={}
for i in range(len(strainNamesL)-1):
strain1 = strainNamesL[i]
for j in range(i+1,len(strainNamesL)):
strain2 = strainNamesL[j]
spScoreD[(strain1,strain2)]=[]
# loop through aabrhL and populate
for orthoT in aabrhL:
spScoreD = addPairwiseScores(spScoreD,orthoT,scoresO,geneNames)
# get mean and standard deviation
summaryD = {}
for sp1,sp2 in spScoreD:
mean = statistics.mean(spScoreD[(sp1,sp2)])
std = statistics.stdev(spScoreD[(sp1,sp2)])
summaryD[(sp1,sp2)] = (mean,std)
summaryD[(sp2,sp1)] = (mean,std)
return summaryD
评论列表
文章目录