def format(self, word_seq, vocab_size, sequence_size):
words = []
nexts = []
sequence_count = (len(word_seq) - 1) // sequence_size
for i in range(sequence_count):
start = i * sequence_size
words.append(word_seq[start:start + sequence_size])
next_seq = word_seq[(start + 1):(start + 1 + sequence_size)]
next_seq_as_one_hot = to_categorical(next_seq, vocab_size) # to one hot vector
nexts.append(next_seq_as_one_hot)
words = np.array(words)
nexts = np.array(nexts)
return words, nexts
评论列表
文章目录