def prep_data(self):
# 1, Read raw Training,Validation and Test data
self.train,self.validation,self.test = self.load_data()
# 2, Prep Word Indexer: assign each word a number
self.indexer = Tokenizer(lower=False, filters='')
self.indexer.fit_on_texts(self.train[0] + self.train[1]) # todo remove test
self.Vocab = len(self.indexer.word_counts) + 1
# 3, Convert each word in sent to num and zero pad
def padding(x, MaxLen):
return pad_sequences(sequences=self.indexer.texts_to_sequences(x), maxlen=MaxLen)
def pad_data(x):
return padding(x[0], self.SentMaxLen), padding(x[1], self.SentMaxLen), x[2]
self.train = pad_data(self.train)
self.validation = pad_data(self.validation)
self.test = pad_data(self.test)
评论列表
文章目录