def optimize_glove(glove_path, vocab):
"""Trim down GloVe embeddings to use only words in the data."""
vocab_set = frozenset(vocab)
seen_vocab = []
X = []
with open(glove_path) as f:
for line in f:
line = line.strip().split(' ') # split() fails on ". . ."
word, embed = line[0], line[1:]
if word in vocab_set:
X.append(np.array(embed, dtype=np.float32))
seen_vocab.append(word)
return seen_vocab, np.row_stack(X)
评论列表
文章目录