def _readTxt(fname):
'''Returns array of words and word embedding matrix
'''
words, vectors = [], []
hook = codecs.open(fname, 'r', 'utf-8')
# get summary info about vectors file
(numWords, dim) = (int(s.strip()) for s in hook.readline().split())
for line in hook:
chunks = line.split()
word, vector = chunks[0].strip(), np.array([float(n) for n in chunks[1:]])
words.append(word)
vectors.append(vector)
hook.close()
assert len(words) == numWords
for v in vectors: assert len(v) == dim
return (words, vectors)
评论列表
文章目录