textutil.py 文件源码-python代码片段

textutil.py 文件源码
python
阅读 34 收藏 0 点赞 0 评论 0
def get_word_vectors(dim=None, size=None, filename=None):
    vocab = get_nlp().vocab
    if filename is not None:
        print("Loading word vectors from '%s'..." % filename)
        try:
            with open(filename, encoding="utf-8") as f:
                first_line = f.readline().split()
                if len(first_line) == 2 and all(s.isdigit() for s in first_line):
                    vocab.resize_vectors(int(first_line[1]))
                else:
                    f.seek(0)  # First line is already a vector and not a header, so let load_vectors read it
                vocab.load_vectors(f)
        except OSError as e:
            raise IOError("Failed loading word vectors from '%s'" % filename) from e
    elif dim is not None and dim < vocab.vectors_length:
        vocab.resize_vectors(dim)
    lexemes = sorted([l for l in vocab if l.has_vector], key=attrgetter("prob"), reverse=True)[:size]
    return {l.orth_: l.vector for l in lexemes}, vocab.vectors_length