def test(self):
ckpt = tf.train.get_checkpoint_state(self.model_dir)
if ckpt and ckpt.model_checkpoint_path:
self.saver.restore(self.sess, ckpt.model_checkpoint_path)
else:
print("...no checkpoint found...")
if self.isInteractive:
self.interactive()
else:
testS, testQ, testA = vectorize_data(
self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size)
n_test = len(testS)
print("Testing Size", n_test)
test_preds = self.batch_predict(testS, testQ, n_test)
test_acc = metrics.accuracy_score(test_preds, testA)
print("Testing Accuracy:", test_acc)
python类vectorize_data()的实例源码
def test(self):
ckpt = tf.train.get_checkpoint_state(self.model_dir)
if ckpt and ckpt.model_checkpoint_path:
self.saver.restore(self.sess, ckpt.model_checkpoint_path)
else:
print("...no checkpoint found...")
if self.isInteractive:
self.interactive()
else:
testP, testS, testQ, testA = vectorize_data(self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size)
n_test = len(testS)
print("Testing Size", n_test)
test_preds=self.batch_predict(testP,testS,testQ,n_test)
test_acc = metrics.accuracy_score(test_preds, testA)
print("Testing Accuracy:", test_acc)
# print(testA)
# for pred in test_preds:
# print(pred, self.indx2candid[pred])
def test(self):
ckpt = tf.train.get_checkpoint_state(self.model_dir)
if ckpt and ckpt.model_checkpoint_path:
self.saver.restore(self.sess, ckpt.model_checkpoint_path)
else:
print("...no checkpoint found...")
if self.isInteractive:
self.interactive()
else:
testS, testQ, testA = vectorize_data(self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size)
n_test = len(testS)
print("Testing Size", n_test)
test_preds=self.batch_predict(testS,testQ,n_test)
test_acc = metrics.accuracy_score(test_preds, testA)
print("Testing Accuracy:", test_acc)
# print(testA)
# for pred in test_preds:
# print(pred, self.indx2candid[pred])
def test(self):
ckpt = tf.train.get_checkpoint_state(self.model_dir)
if ckpt and ckpt.model_checkpoint_path:
self.saver.restore(self.sess, ckpt.model_checkpoint_path)
else:
print("...no checkpoint found...")
if self.isInteractive:
self.interactive()
else:
testP, testS, testQ, testA = vectorize_data(self.testData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size)
n_test = len(testS)
print("Testing Size", n_test)
test_preds=self.batch_predict(testP,testS,testQ,n_test)
test_acc = metrics.accuracy_score(test_preds, testA)
print("Testing Accuracy:", test_acc)
# print(testA)
# for pred in test_preds:
# print(pred, self.indx2candid[pred])
def load_data(self):
# single babi task
# TODO: refactor all this running elsewhere
# task data
train, test = load_task(data_dir, task_id)
vocab = sorted(reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in train + test)))
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
self.memory_size = 50
self.max_story_size = max(map(len, (s for s, _, _ in train + test)))
self.mean_story_size = int(np.mean(map(len, (s for s, _, _ in train + test))))
self.sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in train + test)))
self.query_size = max(map(len, (q for _, q, _ in train + test)))
self.memory_size = min(self.memory_size, self.max_story_size)
self.vocab_size = len(word_idx) + 1 # +1 for nil word
self.sentence_size = max(self.query_size, self.sentence_size) # for the position
print("Longest sentence length", self.sentence_size)
print("Longest story length", self.max_story_size)
print("Average story length", self.mean_story_size)
# train/validation/test sets
self.S, self.Q, self.A = vectorize_data(train, word_idx, self.sentence_size, self.memory_size)
self.trainS, self.valS, self.trainQ, self.valQ, self.trainA, self.valA = cross_validation.train_test_split(self.S, self.Q, self.A, test_size=.1) # TODO: randomstate
self.testS, self.testQ, self.testA = vectorize_data(test, word_idx, self.sentence_size, self.memory_size)
print(self.testS[0])
print("Training set shape", self.trainS.shape)
# params
self.n_train = self.trainS.shape[0]
self.n_test = self.testS.shape[0]
self.n_val = self.valS.shape[0]
print("Training Size", self.n_train)
print("Validation Size", self.n_val)
print("Testing Size", self.n_test)
def interactive(self):
context = []
u = None
r = None
nid = 1
while True:
line = raw_input('--> ').strip().lower()
if line == 'exit':
break
if line == 'restart':
context = []
nid = 1
print("clear memory")
continue
u = tokenize(line)
data = [(context, u, -1)]
s, q, a = vectorize_data(
data, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size)
preds = self.model.predict(s, q)
r = self.indx2candid[preds[0]]
print(r)
r = tokenize(r)
u.append('$u')
u.append('#' + str(nid))
r.append('$r')
r.append('#' + str(nid))
context.append(u)
context.append(r)
nid += 1
def interactive(self):
context=[['male', 'young', '$r', '#0']]
# context = []
u=None
r=None
nid=1
while True:
line=input('--> ').strip().lower()
if line=='exit':
break
if line=='restart':
context=[['female', 'young', '$r', '#0']]
# context = []
nid=1
print("clear memory")
continue
u=tokenize(line)
data=[(context,u,-1)]
s,q,a=vectorize_data(data, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size)
preds=self.model.predict(s,q)
r=self.indx2candid[preds[0]]
print(r)
r=tokenize(r)
u.append('$u')
u.append('#'+str(nid))
r.append('$r')
r.append('#'+str(nid))
context.append(u)
context.append(r)
nid+=1
def train(self):
trainS, trainQ, trainA = vectorize_data(
self.trainData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size)
valS, valQ, valA = vectorize_data(
self.valData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size)
n_train = len(trainS)
n_val = len(valS)
print("Training Size", n_train)
print("Validation Size", n_val)
tf.set_random_seed(self.random_state)
batches = zip(range(0, n_train - self.batch_size, self.batch_size),
range(self.batch_size, n_train, self.batch_size))
batches = [(start, end) for start, end in batches]
best_validation_accuracy = 0
for t in range(1, self.epochs + 1):
np.random.shuffle(batches)
total_cost = 0.0
for start, end in batches:
s = trainS[start:end]
q = trainQ[start:end]
a = trainA[start:end]
cost_t = self.model.batch_fit(s, q, a)
total_cost += cost_t
if t % self.evaluation_interval == 0:
train_preds = self.batch_predict(trainS, trainQ, n_train)
val_preds = self.batch_predict(valS, valQ, n_val)
train_acc = metrics.accuracy_score(
np.array(train_preds), trainA)
val_acc = metrics.accuracy_score(val_preds, valA)
print('-----------------------')
print('Epoch', t)
print('Total Cost:', total_cost)
print('Training Accuracy:', train_acc)
print('Validation Accuracy:', val_acc)
print('-----------------------')
# write summary
train_acc_summary = tf.summary.scalar(
'task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32))
val_acc_summary = tf.summary.scalar(
'task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32))
merged_summary = tf.summary.merge(
[train_acc_summary, val_acc_summary])
summary_str = self.sess.run(merged_summary)
self.summary_writer.add_summary(summary_str, t)
self.summary_writer.flush()
if val_acc > best_validation_accuracy:
best_validation_accuracy = val_acc
self.saver.save(self.sess, self.model_dir +
'model.ckpt', global_step=t)
def train(self):
trainP, trainS, trainQ, trainA = vectorize_data(self.trainData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size)
valP, valS, valQ, valA = vectorize_data(self.valData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size)
n_train = len(trainS)
n_val = len(valS)
print("Training Size", n_train)
print("Validation Size", n_val)
tf.set_random_seed(self.random_state)
batches = zip(range(0, n_train-self.batch_size, self.batch_size), range(self.batch_size, n_train, self.batch_size))
batches = [(start, end) for start, end in batches]
best_validation_accuracy=0
for t in range(1, self.epochs+1):
print('Epoch', t)
np.random.shuffle(batches)
total_cost = 0.0
for start, end in batches:
p = trainP[start:end]
s = trainS[start:end]
q = trainQ[start:end]
a = trainA[start:end]
cost_t = self.model.batch_fit(p, s, q, a)
total_cost += cost_t
if t % self.evaluation_interval == 0:
train_preds=self.batch_predict(trainP,trainS,trainQ,n_train)
val_preds=self.batch_predict(valP,valS,valQ,n_val)
train_acc = metrics.accuracy_score(np.array(train_preds), trainA)
val_acc = metrics.accuracy_score(val_preds, valA)
print('-----------------------')
print('Epoch', t)
print('Total Cost:', total_cost)
print('Training Accuracy:', train_acc)
print('Validation Accuracy:', val_acc)
print('-----------------------')
# write summary
# train_acc_summary = tf.scalar_summary('task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32))
# val_acc_summary = tf.scalar_summary('task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32))
# merged_summary = tf.merge_summary([train_acc_summary, val_acc_summary])
train_acc_summary = tf.summary.scalar('task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32))
val_acc_summary = tf.summary.scalar('task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32))
merged_summary = tf.summary.merge([train_acc_summary, val_acc_summary])
summary_str = self.sess.run(merged_summary)
self.summary_writer.add_summary(summary_str, t)
self.summary_writer.flush()
if val_acc > best_validation_accuracy:
best_validation_accuracy=val_acc
self.saver.save(self.sess,self.model_dir+'model.ckpt',global_step=t)
def train(self):
trainS, trainQ, trainA = vectorize_data(self.trainData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size)
valS, valQ, valA = vectorize_data(self.valData, self.word_idx, self.sentence_size, self.batch_size, self.n_cand, self.memory_size)
n_train = len(trainS)
n_val = len(valS)
print("Training Size",n_train)
print("Validation Size", n_val)
tf.set_random_seed(self.random_state)
batches = zip(range(0, n_train-self.batch_size, self.batch_size), range(self.batch_size, n_train, self.batch_size))
batches = [(start, end) for start, end in batches]
best_validation_accuracy=0
for t in range(1, self.epochs+1):
print('Epoch', t)
np.random.shuffle(batches)
total_cost = 0.0
for start, end in batches:
s = trainS[start:end]
q = trainQ[start:end]
a = trainA[start:end]
cost_t = self.model.batch_fit(s, q, a)
total_cost += cost_t
if t % self.evaluation_interval == 0:
train_preds=self.batch_predict(trainS,trainQ,n_train)
val_preds=self.batch_predict(valS,valQ,n_val)
train_acc = metrics.accuracy_score(np.array(train_preds), trainA)
val_acc = metrics.accuracy_score(val_preds, valA)
print('-----------------------')
print('Epoch', t)
print('Total Cost:', total_cost)
print('Training Accuracy:', train_acc)
print('Validation Accuracy:', val_acc)
print('-----------------------')
# write summary
# train_acc_summary = tf.scalar_summary('task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32))
# val_acc_summary = tf.scalar_summary('task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32))
# merged_summary = tf.merge_summary([train_acc_summary, val_acc_summary])
train_acc_summary = tf.summary.scalar('task_' + str(self.task_id) + '/' + 'train_acc', tf.constant((train_acc), dtype=tf.float32))
val_acc_summary = tf.summary.scalar('task_' + str(self.task_id) + '/' + 'val_acc', tf.constant((val_acc), dtype=tf.float32))
merged_summary = tf.summary.merge([train_acc_summary, val_acc_summary])
summary_str = self.sess.run(merged_summary)
self.summary_writer.add_summary(summary_str, t)
self.summary_writer.flush()
if val_acc > best_validation_accuracy:
best_validation_accuracy=val_acc
self.saver.save(self.sess,self.model_dir+'model.ckpt',global_step=t)