def glove(data_fname='glove.840B.300d.txt', out_fname='glove.pkl'):
"""Process raw dependency GloVe data from Socher '13"""
words, U, dim = [], [], None
with open(DATA_DIR + data_fname, 'rb') as f:
for j, line in enumerate(f):
x = line.strip().split()
word, vector, d = x[0], np.ravel(x[1:]), len(x) - 1
if dim is None: dim = d
elif d != dim: raise Exception('{0}: {1}!={2}'.format(j, dim, d))
U.append(vector)
words.append(word)
U = np.array(U)
print "Found {0} words".format(len(words))
print "Found {0}x{1} embedding matrix".format(*U.shape)
with open(DATA_DIR + out_fname, 'wb') as f:
cPickle.dump((words, U), f)
评论列表
文章目录