def load_vocab(self, vocab):
self.vocab = self.load('model/vocab.pkl')
self.max_length = self.load('model/max_length.pkl')
if self.vocab != None and self.max_length != None:
return
vocab_temp, self.max_length = utils.load_data2list_string(vocab)
# vocab_temp = sorted(vocab_temp, key=lambda s: len(s.split()), reverse=True)
vocab_temp = filter(lambda s: len(s.split()) > 1, vocab_temp) # remove word have one syllable
vocab_temp_clone = map(lambda s: s.replace(u' ', u'_'), vocab_temp)
self.vocab = {i:{} for i in xrange(1, self.max_length+1)}
for i in xrange(len(vocab_temp)):
s = vocab_temp[i]
ss = vocab_temp_clone[i]
w = s.split()[0]
length = vocab_temp[i].count(u' ')
try: self.vocab[length][w].update({s:ss})
except: self.vocab[length].update({w:{s:ss}})
print('size of vocab = %d' % (len(vocab_temp)))
self.save_model(self.vocab, 'model/vocab.pkl')
self.save_model(self.max_length, 'model/max_length.pkl')
评论列表
文章目录