def train(self, chain_len = None):
""" Trains the markov data structure by creating chains of desired length """
if not chain_len:
chain_len = self.CHAIN_LENGTH
self.CHAIN_LEN = chain_len
self.everything['corpus'] = {}
self.corpus = self.everything['corpus']
for f in self.everything['input']:
for line in sent_tokenize( self.everything['input'][f] ):
words = word_tokenize(line)
for chain in self._make_chains(words):
k = " ".join( chain[:-1] ) # key is everything but last word
v = chain[-1] # value is last word
try:
self.corpus[k].append(v)
except:
self.corpus[k] = [v]
评论列表
文章目录