def Word2Vec_encode(texts, wemb):
out = np.zeros((len(texts), prm.dim_emb), dtype=np.float32)
for i, text in enumerate(texts):
words = wordpunct_tokenize(text)
n = 0.
for word in words:
if word in wemb:
out[i,:] += wemb[word]
n += 1.
out[i,:] /= max(1.,n)
return out
评论列表
文章目录