def load_word_vectors(embeddings_path):
if os.path.isfile(embeddings_path + '.pth') and \
os.path.isfile(embeddings_path + '.vocab'):
print('==> File found, loading to memory')
vectors = torch.load(embeddings_path + '.pth')
vocab = Vocab(filename=embeddings_path + '.vocab')
return vocab, vectors
if os.path.isfile(embeddings_path + '.model'):
model = KeyedVectors.load(embeddings_path + ".model")
if os.path.isfile(embeddings_path + '.vec'):
model = FastText.load_word2vec_format(embeddings_path + '.vec')
list_of_tokens = model.vocab.keys()
vectors = torch.zeros(len(list_of_tokens), model.vector_size)
with open(embeddings_path + '.vocab', 'w', encoding='utf-8') as f:
for token in list_of_tokens:
f.write(token+'\n')
vocab = Vocab(filename=embeddings_path + '.vocab')
for index, word in enumerate(list_of_tokens):
vectors[index, :] = torch.from_numpy(model[word])
return vocab, vectors
评论列表
文章目录