def loadTestData(folderName):
data_train = pd.read_csv(folderName + 'data/test_datum.txt', sep='\t', error_bad_lines=False)
labels = []
for idx in range(data_train.question.shape[0]):
labels.append(data_train.value[idx])
texts_c3 = pickle.load(open(folderName + 'test_lemmas_c', 'rb'))
texts_q3 = pickle.load(open(folderName + 'test_lemmas_q', 'rb'))
texts_a3 = pickle.load(open(folderName + 'test_lemmas_a', 'rb'))
tokenizer = pickle.load(open(folderName + 'structures/tokenizer', 'rb'))
sequences_q = tokenizer.texts_to_sequences(texts_q3)
sequences_a = tokenizer.texts_to_sequences(texts_a3)
sequences_c = tokenizer.texts_to_sequences(texts_c3)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))
data_q = pad_sequences(sequences_q, maxlen=MAX_SEQUENCE_LENGTH_Q)
data_a = pad_sequences(sequences_a, maxlen=MAX_SEQUENCE_LENGTH_A)
data_c = pad_sequences(sequences_c, maxlen=MAX_SEQUENCE_LENGTH_C)
labels = to_categorical(np.asarray(labels))
print('Shape of label tensor:', labels.shape)
return [data_c, data_q, data_a, labels, data_train]
评论列表
文章目录