recipe-577988.py 文件源码-python代码片段

recipe-577988.py 文件源码
python
阅读 39 收藏 0 点赞 0 评论 0
def __init__(self,txt,seq_len=5):
        """txt = original text 
        seq_len = sequence length ; 3 to 6 give the best results"""
        # dictionary mapping sequences of seq_len chararcters to the list 
        # of characters following them in the original text
        self.followers = {}
        for i in range(len(txt)-2*seq_len):
            sequence = txt[i:i+seq_len] # sequence of seq_len characters
            next_char = txt[i+seq_len] # the character following this sequence
            if sequence in self.followers:
                self.followers[sequence].append(next_char)
            else:
                self.followers[sequence]=[next_char]

        # sequences that start with an uppercase letter
        starts = [ key for key in self.followers 
            if key[0] in string.ascii_uppercase ]
        if not starts: # just in case...
            starts = list(self.followers.keys())

        # build a distribution of these sequences with the same frequency
        # as in the original text
        self.starts = []
        for key in starts:
            for i in range(len(self.followers[key])):
                self.starts.append(key)