def make_phrases(self, start = 1, end = None):
if not end: end = start + 1
for chain_len in range(start, end): # +1 because of the way range works
self.phrases[chain_len] = []
for f in self.everything['input']:
for line in sent_tokenize( self.everything['input'][f] ):
words = word_tokenize(line)
for chain in self._make_chains(words, chain_len):
try:
# print "ERROR.0:", chain
chain = chain[:-1] # drop last item in chain as it's "value" for markov
chain = [c for c in chain if c is not None] # quick clean as None is breaking join
except:
print "ERROR.1:", chain
# sys.exit(-1)
# print chain_len, " => ", chain
try:
self.phrases[chain_len].append(" ".join(chain) )
except:
print "ERROR.2:", chain
sys.exit(-1)
return Counter( self.phrases[chain_len] )
评论列表
文章目录