def get_vocab(data): vocab = set() for analogy in data: vocab.update(analogy) vocab = sorted(vocab) return dict([(a, i) for i, a in enumerate(vocab)]), vocab