def load_imdb():
from keras.preprocessing.text import Tokenizer
from keras.datasets import imdb
max_words = 1000
print('Loading data...')
(x1, y1), (x2, y2) = imdb.load_data(num_words=max_words)
x = np.concatenate((x1, x2))
y = np.concatenate((y1, y2))
print(len(x), 'train sequences')
num_classes = np.max(y) + 1
print(num_classes, 'classes')
print('Vectorizing sequence data...')
tokenizer = Tokenizer(num_words=max_words)
x = tokenizer.sequences_to_matrix(x, mode='binary')
print('x_train shape:', x.shape)
return x.astype(float), y
评论列表
文章目录