def array2scaffolds(outbase, infile, infile2, fasta, threads, minWindows, scaffolds=[]):
"""Return scaffolds computed for given matrix"""
logger("Loading FastA...")
faidx = FastaIndex(fasta)
logger(" %s bp in %s contigs"%(faidx.genomeSize, len(faidx)))
logger("Loading matrix from %s ..."%infile)
d, bin_chr, bin_position, contig2size = load_matrix(infile, scaffolds=scaffolds, verbose=1)
logger(" matrix of %s windows for %s contigs summing %s bp"%(d.shape[0], len(contig2size), sum(contig2size.values())))
# make sure all contigs from matrix are present in FastA
diff = set(contig2size.keys()).difference(faidx)
if diff:
sys.stderr.write("[ERROR] %s / %s contigs are missing from provided FastA!\n"%(len(diff), len(contig2size)))
sys.exit(1)
logger("Calculating linkage matrix & tree...")
names = ["%s %7sk"%(get_name(c), s/1000) for c, (s, e) in zip(bin_chr, bin_position)]
d = transform(d)
t = array2tree(d, names, outbase)
del d
logger("Assigning contigs to clusters/scaffolds...")
clusters = get_clusters(outbase, t, contig2size, bin_chr)
logger("Constructing scaffolds...")
if threads > 1:
scaffolds = clusters2scaffolds_multi(clusters, infile2, minWindows, scaffolds, threads)
else:
scaffolds = clusters2scaffolds(clusters, infile2, minWindows, scaffolds)
logger("Reporting %s scaffolds..."%len(scaffolds))
fastafn = report_scaffolds(outbase, infile, scaffolds, faidx)
return scaffolds, fastafn
评论列表
文章目录