def load_embeddings(self, filename, xz=False):
if not os.path.isfile(filename):
print(filename, "does not exist")
return self
if xz:
f = lzma.open(filename, "rt", encoding="utf-8", errors="ignore")
else:
f = open(filename, "r")
found_set = set()
for line in f:
l = line.split()
word = strong_normalize(l[0])
vec = [float(x) for x in l[1:]]
if word in self._vocab:
found_set.add(word)
self._word_lookup.init_row(self._vocab[word], vec)
f.close()
print("Loaded embeddings from", filename)
print(len(found_set), "hits with vocab size of", len(self._vocab))
return self
评论列表
文章目录