def create_batch(self, sentence_li):
"""Create a batch for a list of sentences."""
embeddings_batch = []
for sen in sentence_li:
embeddings = []
sent_toks = sent_tokenize(sen)
word_toks = [word_tokenize(el) for el in sent_toks]
tokens = [val for sublist in word_toks for val in sublist]
tokens = [el for el in tokens if el != '']
for tok in tokens:
embeddings.append(self.embdict.tok2emb.get(tok))
if len(tokens) < self.max_sequence_length:
pads = [np.zeros(self.embedding_dim) for _ in range(self.max_sequence_length - len(tokens))]
embeddings = pads + embeddings
else:
embeddings = embeddings[-self.max_sequence_length:]
embeddings = np.asarray(embeddings)
embeddings_batch.append(embeddings)
embeddings_batch = np.asarray(embeddings_batch)
return embeddings_batch
评论列表
文章目录