def chunk_control_matrices( self, control_ipds_fn, control_ipds_N_fn, control_kmers_fn ):
"""
"""
kmers = np.atleast_1d(np.loadtxt(control_kmers_fn, dtype="str"))
fns = [control_ipds_fn, control_ipds_N_fn]
n_chunks = 99
chunksize = int(math.ceil(float( len(kmers)/n_chunks )))
cols_chunks = list(chunks( range(len(kmers)), chunksize ))
args = []
for i,cols_chunk in enumerate(cols_chunks):
cut_CMDs = []
for fn in fns:
cut_cols = "%s-%s" % ((cols_chunk[0]+1), (cols_chunk[-1]+1))
in_fn = fn
out_fn = fn+".sub.%s" % i
cut_CMD = "cut -d$\'\\t\' -f%s %s > %s" % (cut_cols, in_fn, out_fn)
cut_CMDs.append(cut_CMD)
args.append( (i, cut_CMDs, kmers, cols_chunk, n_chunks, self.opts.min_motif_count) )
results = mbin.launch_pool(self.opts.procs, process_contig_chunk, args)
logging.info("Combining motifs from all chunks of control data...")
not_found = 0
control_means = {}
for i,result in enumerate(results):
not_found += result[1]
for motif in result[0].keys():
control_means[motif] = result[0][motif]
logging.info("Done.")
return control_means,not_found
评论列表
文章目录