def kmer_freq ( ref_str, k ):
"""
Walk through sequence and return k-mer counts plus
a pseudocount of 1.
"""
ref_str = ref_str.upper()
kmers = []
for seq in product("ATGC",repeat=k):
kmers.append( "".join(seq) )
kmer_counts = Counter()
for j in range( len(ref_str)-(k-1) ):
motif = ref_str[j:j+k]
kmer_counts[motif] += 1
# Combine forward and reverse complement motifs into one count
combined_kmer = Counter()
for kmer in kmers:
kmer_rc = rev_comp_motif(kmer)
if not combined_kmer.get(kmer_rc):
combined_kmer[kmer] = kmer_counts[kmer] + kmer_counts[kmer_rc] + 1
return combined_kmer
评论列表
文章目录