def load_text_vec(alphabet,filename="",embedding_size = 100):
vectors = {}
with open(filename) as f:
i=0
for line in f:
i+=1
if i % 100000 == 0:
print 'epch %d' % i
items = line.strip().split(' ')
if len(items) == 2:
vocab_size, embedding_size= items[0],items[1]
print ( vocab_size, embedding_size)
else:
word = items[0]
if word in alphabet:
vectors[word] = items[1:]
print 'embedding_size',embedding_size
print 'done'
print 'words found in wor2vec embedding ',len(vectors.keys())
return vectors
评论列表
文章目录