def process_contig_chunk( args ):
chunk_id = args[0]
cut_CMDs = args[1]
kmers = args[2]
cols_chunk = args[3]
n_chunks = args[4]
min_motif_count = args[5]
logging.info(" - Control data: chunk %s/%s" % ((chunk_id+1), (n_chunks+1)))
control_means = {}
for cut_CMD in cut_CMDs:
sts,stdOutErr = mbin.run_OS_command( cut_CMD )
fns = map(lambda x: x.split("> ")[-1], cut_CMDs)
control_ipds_sub = np.loadtxt(fns[0], dtype="float")
control_ipds_N_sub = np.loadtxt(fns[1], dtype="int")
# If there is only one row (read) for this contig, still treat as
# a 2d matrix of many reads
control_ipds_sub = np.atleast_2d(control_ipds_sub)
control_ipds_N_sub = np.atleast_2d(control_ipds_N_sub)
not_found = 0
for j in range(len(cols_chunk)):
motif = kmers[cols_chunk[j]]
if np.sum(control_ipds_N_sub[:,j])>=min_motif_count:
if np.sum(control_ipds_N_sub[:,j])>0:
control_mean = np.dot(control_ipds_sub[:,j], control_ipds_N_sub[:,j]) / np.sum(control_ipds_N_sub[:,j])
else:
control_mean = 0
control_means[motif] = control_mean
else:
not_found += 1
return control_means,not_found
评论列表
文章目录