sentiment.py 文件源码-python代码片段

sentiment.py 文件源码

python

阅读 29 收藏 0 点赞 0 评论 0

def load_embeddings(filename):
  """Loads embedings, returns weight matrix and dict from words to indices."""
  weight_vectors = []
  word_idx = {}
  with codecs.open(filename, encoding='utf-8') as f:
    for line in f:
      word, vec = line.split(u' ', 1)
      word_idx[word] = len(weight_vectors)
      weight_vectors.append(np.array(vec.split(), dtype=np.float32))
  # Annoying implementation detail; '(' and ')' are replaced by '-LRB-' and
  # '-RRB-' respectively in the parse-trees.
  word_idx[u'-LRB-'] = word_idx.pop(u'(')
  word_idx[u'-RRB-'] = word_idx.pop(u')')
  # Random embedding vector for unknown words.
  weight_vectors.append(np.random.uniform(
      -0.05, 0.05, weight_vectors[0].shape).astype(np.float32))
  return np.stack(weight_vectors), word_idx