def __init__(self,txt,seq_len=5):
"""txt = original text
seq_len = sequence length ; 3 to 6 give the best results"""
# dictionary mapping sequences of seq_len chararcters to the list
# of characters following them in the original text
self.followers = {}
for i in range(len(txt)-2*seq_len):
sequence = txt[i:i+seq_len] # sequence of seq_len characters
next_char = txt[i+seq_len] # the character following this sequence
if sequence in self.followers:
self.followers[sequence].append(next_char)
else:
self.followers[sequence]=[next_char]
# sequences that start with an uppercase letter
starts = [ key for key in self.followers
if key[0] in string.ascii_uppercase ]
if not starts: # just in case...
starts = list(self.followers.keys())
# build a distribution of these sequences with the same frequency
# as in the original text
self.starts = []
for key in starts:
for i in range(len(self.followers[key])):
self.starts.append(key)
评论列表
文章目录