def vfam_to_krona(self, vfam_file):
vfam_dic = defaultdict(int)
families_dic = {}
genera_dic = {}
with open(vfam_file, 'r') as vfam_file:
vfam_file.readline() # get rid of the header
for line in vfam_file:
splitted_line = line.split('\t')
vfam = splitted_line[1]
vfam_dic[vfam] += 1
families = ast.literal_eval(splitted_line[3]) # safe eval of dict
families_dic[vfam] = families
genera = ast.literal_eval(splitted_line[4])
genera_dic[vfam] = genera
with open(self.krona_in, 'w') as o:
for vfam, n_reads in vfam_dic.items():
fam_total = sum(families_dic[vfam].values())
for fam, fam_prop in families_dic[vfam].items():
gen_total = sum(genera_dic[vfam].values())
for genera, gen_prop in genera_dic[vfam].items():
n = (n_reads * (fam_prop / fam_total)) * (gen_prop / gen_total)
o.write('%.3f\t%s\t%s\t%s\n' % (n, fam, vfam, genera))
评论列表
文章目录