def get_pretrained_embeddings(filepath):
embeddings = dict()
with open(filepath, 'r') as f:
for line in f:
info = line.strip().split()
#lines = [line.strip().split() for line in f.readlines()]
#embeddings = dict([(line[0], [float(r) for r in line[1:]]) for line in lines])
embeddings[info[0]] = [float(r) for r in info[1:]]
f.close()
embedding_size = len(embeddings.values()[0])
print 'Embedding size={}'.format(embedding_size)
embeddings[START_MARKER] = [random.gauss(0, 0.01) for _ in range(embedding_size)]
embeddings[END_MARKER] = [random.gauss(0, 0.01) for _ in range(embedding_size)]
if not UNKNOWN_TOKEN in embeddings:
embeddings[UNKNOWN_TOKEN] = [random.gauss(0, 0.01) for _ in range(embedding_size)]
return embeddings
评论列表
文章目录