def build_onehots(self, vocab_size=None):
"""Build one-hot encodings of each sequence."""
# If we're passed a charset size, great - if not, fall back to inferring vocab size
if vocab_size:
self.charsize = vocab_size
vocab = vocab_size
else:
vocab = self.charsize
stderr.write("Constructing one-hot vector data...")
stderr.flush()
time1 = time.time()
# These can be large, so we don't necessarily want them on the GPU
# Thus they're not Theano shared vars
# Also, numpy fancy indexing is fun!
self.x_onehots = np.eye(vocab, dtype=th.config.floatX)[self.x_array]
self.y_onehots = np.eye(vocab, dtype=th.config.floatX)[self.y_array]
time2 = time.time()
stderr.write("done!\nTook {0:.4f} ms.\n".format((time2 - time1) * 1000.0))
评论列表
文章目录