pickle_emb.py 文件源码-python代码片段

pickle_emb.py 文件源码

python

阅读 21 收藏 0 点赞 0 评论 0

项目：sentiment_lstm 作者: wenjiesha 项目源码文件源码

def main():
  embedding_size = 300
  path = '../../data/imdb.dict.pkl'
  dictionary = pickle.load(open(path))
  out_path = '../../data/imdb.emb.pkl'

  path = '../../data/output.txt'
  # Account for missing index 0 and 1.
  emb = np.zeros([len(dictionary) + 2, embedding_size], dtype=np.float32)
  with open(path, 'r') as f:
    while True:
      word = f.readline()
      if not word:
        break
      word = word.rstrip()
      embeddings = f.readline().rstrip().split()
      embeddings = [float(e) for e in embeddings]
      if word in dictionary:
        emb[int(dictionary[word]), :] = np.array(embeddings, dtype=np.float32)

  pickle.dump(emb, open(out_path, 'wb'))