def get_word_vectors(dim=None, size=None, filename=None):
vocab = get_nlp().vocab
if filename is not None:
print("Loading word vectors from '%s'..." % filename)
try:
with open(filename, encoding="utf-8") as f:
first_line = f.readline().split()
if len(first_line) == 2 and all(s.isdigit() for s in first_line):
vocab.resize_vectors(int(first_line[1]))
else:
f.seek(0) # First line is already a vector and not a header, so let load_vectors read it
vocab.load_vectors(f)
except OSError as e:
raise IOError("Failed loading word vectors from '%s'" % filename) from e
elif dim is not None and dim < vocab.vectors_length:
vocab.resize_vectors(dim)
lexemes = sorted([l for l in vocab if l.has_vector], key=attrgetter("prob"), reverse=True)[:size]
return {l.orth_: l.vector for l in lexemes}, vocab.vectors_length
评论列表
文章目录