def __load_embeding_model(self, file_path, max_vocab_size=100000):
self.__embed_vectors = dict()
if not file_path:
print('Embeddings file not provided')
return
if not os.path.exists(file_path):
print('Embeddings file not found:', file_path)
return
print('Loading the embedding model from:', file_path)
bar = progressbar.ProgressBar(max_value=max_vocab_size)
with open(file_path, "r") as embed_f:
for line in embed_f:
try:
tab = line.rstrip().split()
word = tab[0].lower()
if not word in self.__embed_vectors:
vec = numpy.array(tab[1:], dtype=float)
self.__embed_vectors[word] = vec
except ValueError:
continue
bar.update(len(self.__embed_vectors))
if len(self.__embed_vectors) == max_vocab_size:
bar.finish()
return
评论列表
文章目录