def process_contig_chunk( args ):
chunk_id = args[0]
control_pkl = args[1]
cut_CMDs = args[2]
kmers = args[3]
cols_chunk = args[4]
contig_id = args[5]
n_chunks = args[6]
n_contigs = args[7]
opts = args[8]
logging.info(" - Contig %s/%s: chunk %s/%s" % ((contig_id+1), n_contigs, (chunk_id+1), (n_chunks+1)))
control_means = pickle.load(open(control_pkl, "rb"))
contig_motifs = {}
case_motif_Ns = {}
for cut_CMD in cut_CMDs:
sts,stdOutErr = mbin.run_OS_command( cut_CMD )
fns = map(lambda x: x.split("> ")[-1], cut_CMDs)
contig_ipds_sub = np.loadtxt(fns[0], dtype="float")
contig_ipds_N_sub = np.loadtxt(fns[1], dtype="int")
# If there is only one row (read) for this contig, still treat as
# a 2d matrix of many reads
contig_ipds_sub = np.atleast_2d(contig_ipds_sub)
contig_ipds_N_sub = np.atleast_2d(contig_ipds_N_sub)
for j in range(len(cols_chunk)):
motif = kmers[cols_chunk[j]]
case_contig_Ns = contig_ipds_N_sub[:,j]
if control_means.get(motif):
case_contig_means = contig_ipds_sub[:,j]
if np.sum(case_contig_Ns)>0:
case_mean = np.dot(case_contig_means, case_contig_Ns) / np.sum(case_contig_Ns)
else:
case_mean = 0
score = case_mean - control_means[motif]
contig_motifs[motif] = score
case_motif_Ns[motif] = np.sum(case_contig_Ns)
return contig_motifs,case_motif_Ns
评论列表
文章目录