def validation_check():
# Load graph
g = Graph(is_training=False); print("Graph loaded")
# Load data
X, Y = load_data(mode="val")
with g.graph.as_default():
sv = tf.train.Supervisor()
with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
# Restore parameters
sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!")
# Get model
mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
# Inference
if not os.path.exists(hp.results): os.mkdir(hp.results)
with open(os.path.join(hp.results, "validation_results.txt"), 'a') as fout:
expected, predicted = [], []
for step in range(len(X) // hp.batch_size):
x = X[step * hp.batch_size: (step + 1) * hp.batch_size]
y = Y[step * hp.batch_size: (step + 1) * hp.batch_size]
# predict intensities
logits = sess.run(g.logits, {g.x: x})
expected.extend(list(y))
predicted.extend(list(logits))
# Get spearman coefficients
score, _ = spearmanr(expected, predicted)
fout.write("{}\t{}\n".format(mname, score))
python类Graph()的实例源码
def test():
x, y = load_data(type="test")
g = Graph(is_training=False)
with g.graph.as_default():
sv = tf.train.Supervisor()
with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
# Restore parameters
sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
print("Restored!")
# Get model name
mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
if not os.path.exists('results'): os.mkdir('results')
fout = 'results/{}.txt'.format(mname)
import copy
_preds = copy.copy(x)
while 1:
istarget, probs, preds = sess.run([g.istarget, g.probs, g.preds], {g.x:_preds, g.y: y})
probs = probs.astype(np.float32)
preds = preds.astype(np.float32)
probs *= istarget #(N, 9, 9)
preds *= istarget #(N, 9, 9)
probs = np.reshape(probs, (-1, 9*9)) #(N, 9*9)
preds = np.reshape(preds, (-1, 9*9))#(N, 9*9)
_preds = np.reshape(_preds, (-1, 9*9))
maxprob_ids = np.argmax(probs, axis=1) # (N, ) <- blanks of the most probable prediction
maxprobs = np.max(probs, axis=1, keepdims=False)
for j, (maxprob_id, maxprob) in enumerate(zip(maxprob_ids, maxprobs)):
if maxprob != 0:
_preds[j, maxprob_id] = preds[j, maxprob_id]
_preds = np.reshape(_preds, (-1, 9, 9))
_preds = np.where(x==0, _preds, y) # # Fill in the non-blanks with correct numbers
if np.count_nonzero(_preds) == _preds.size: break
write_to_file(x.astype(np.int32), y, _preds.astype(np.int32), fout)
def eval():
# Load graph
g = Graph(is_training=False); print("Graph loaded")
# Load data
x, y = load_eval_data()
char2idx, idx2char = load_vocab()
with g.graph.as_default():
sv = tf.train.Supervisor()
with sv.managed_session() as sess:
# Restore parameters
sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
print("Restored!")
# Get model name
mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1]
# Speech to Text
if not os.path.exists('samples'): os.mkdir('samples')
with codecs.open('samples/{}.txt'.format(mname), 'w', 'utf-8') as fout:
preds = np.zeros((hp.batch_size, hp.max_len), np.int32)
for j in range(hp.max_len):
_preds = sess.run(g.preds, {g.x: x, g.y: preds})
preds[:, j] = _preds[:, j]
# Write to file
for i, (expected, got) in enumerate(zip(y, preds)): # ground truth vs. prediction
fout.write("Expected: {}\n".format(expected.split("S")[0]))
fout.write("Got : {}\n\n".format(("".join(idx2char[idx] for idx in np.fromstring(got, np.int32))).split("S")[0]))
fout.flush()
def eval(mode):
'''
Get a Spearman rank-order correlation coefficient.
Args:
mode: A string. Either `val` or `test`.
'''
# Set save directory
savedir = hp.valdir if mode=="val" else hp.testdir
# Load graph
g = Graph(is_training=False)
print("Graph loaded")
# Load data
X, Y = load_data(mode=mode)
nucl2idx, idx2nucl = load_vocab()
with g.graph.as_default():
sv = tf.train.Supervisor()
with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
# Restore parameters
sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
print("Restored!")
# Get model
mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
# Inference
if not os.path.exists(savedir): os.mkdir(savedir)
with open("{}/{}".format(savedir, mname), 'w') as fout:
fout.write("{}\t{}\t{}]\n".format("probe", "expected intensity", "predicted intensity"))
expected, got = [], []
for step in range(len(X) // hp.batch_size):
x = X[step * hp.batch_size: (step + 1) * hp.batch_size]
y = Y[step * hp.batch_size: (step + 1) * hp.batch_size]
# predict nucl
logits = sess.run(g.logits, {g.x: x})
for xx, yy, ll in zip(x, y, logits): # sequence-wise
fout.write("{}\t{}\t{}\n".format("".join(idx2nucl[idx] for idx in xx), yy, ll))
expected.append(yy)
got.append(ll)
# Spearman rank coefficient
score, _ =spearmanr(expected, got)
fout.write("Spearman rank correlation coefficients: " + str(score))
def eval():
# Load graph
g = Graph(is_training=False); print("Graph loaded")
# Load data
X, Y = load_data(mode="test")
nucl2idx, idx2nucl = load_vocab()
with g.graph.as_default():
sv = tf.train.Supervisor()
with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
# Restore parameters
sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!")
# Get model name
mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
# Inference
if not os.path.exists(hp.results): os.mkdir(hp.results)
with open(os.path.join(hp.results, mname), 'w') as fout:
fout.write("{}\t{}\t{}\n".format("probe", "expected intensity", "predicted intensity"))
expected, predicted = [], []
for step in range(len(X) // hp.batch_size):
x = X[step * hp.batch_size: (step + 1) * hp.batch_size]
y = Y[step * hp.batch_size: (step + 1) * hp.batch_size]
# predict intensities
logits = sess.run(g.logits, {g.x: x})
expected.extend(list(y))
predicted.extend(list(logits))
for xx, yy, ll in zip(x, y, logits): # sequence-wise
fout.write("{}\t{}\t{}\n".format("".join(idx2nucl[idx] for idx in xx), yy, ll))
# Get spearman coefficients
score, _ = spearmanr(expected, predicted)
fout.write("{}{}\n".format("Spearman Coefficient: ", score))
# Plot the ranks of the top 100 positive probes
expected_predicted = sorted(zip(expected, predicted), key=lambda x: float(x[0]), reverse=True)
expected_predicted = [list(each) + [int(i < 100)] for i, each in enumerate(expected_predicted)]
expected_predicted = sorted(expected_predicted, key=lambda x: float(x[1]), reverse=True)
predicted_ranks = np.array([each[-1] for each in expected_predicted])
# Plot
axprops = dict(xticks=[], yticks=[])
barprops = dict(aspect='auto', cmap=plt.cm.binary, interpolation='nearest')
fig = plt.figure()
predicted_ranks.shape = len(predicted_ranks), 1
ax = fig.add_axes([0, 0, .5, 1], **axprops)
ax.imshow(predicted_ranks, **barprops)
fig.savefig('fig/rank.png')
def eval():
# Load graph
g = Graph(is_training=False)
print("Graph loaded")
# Load data
X, Y = load_data(mode="test") # texts
char2idx, idx2char = load_vocab()
with g.graph.as_default():
sv = tf.train.Supervisor()
with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
# Restore parameters
sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
print("Restored!")
# Get model
mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
# Inference
if not os.path.exists(hp.savedir): os.mkdir(hp.savedir)
with open("{}/{}".format(hp.savedir, mname), 'w') as fout:
results = []
baseline_results = []
for step in range(len(X) // hp.batch_size):
x = X[step * hp.batch_size: (step + 1) * hp.batch_size]
y = Y[step * hp.batch_size: (step + 1) * hp.batch_size]
# predict characters
preds = sess.run(g.preds, {g.x: x})
for xx, yy, pp in zip(x, y, preds): # sentence-wise
expected = ''
got = ''
for xxx, yyy, ppp in zip(xx, yy, pp): # character-wise
if xxx == 0:
break
else:
got += idx2char.get(xxx, "*")
expected += idx2char.get(xxx, "*")
if ppp == 1: got += " "
if yyy == 1: expected += " "
# prediction results
if ppp == yyy:
results.append(1)
else:
results.append(0)
# baseline results
if yyy == 0: # no space
baseline_results.append(1)
else:
baseline_results.append(0)
fout.write("?Expected: " + expected + "\n")
fout.write("?Got: " + got + "\n\n")
fout.write(
"Final Accuracy = %d/%d=%.4f\n" % (sum(results), len(results), float(sum(results)) / len(results)))
fout.write(
"Baseline Accuracy = %d/%d=%.4f" % (sum(baseline_results), len(baseline_results), float(sum(baseline_results)) / len(baseline_results)))
def eval_it():
# Load graph
g = Graph(is_training=False)
print("Graph loaded")
# Load data
X = load_eval_data() # texts
char2idx, idx2char = load_vocab()
with g.graph.as_default():
sv = tf.train.Supervisor()
with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
# Restore parameters
sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
print("Restored!")
# Get model
mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
timesteps = 100 # Adjust this number as you want
outputs1 = np.zeros((hp.num_samples, timesteps, hp.n_mels * hp.r), np.float32) # hp.n_mels*hp.r
for j in range(timesteps):
_outputs1 = sess.run(g.outputs1, {g.x: X, g.y: outputs1})
outputs1[:, j, :] = _outputs1[:, j, :]
outputs2 = sess.run(g.outputs2, {g.outputs1: outputs1})
# Generate wav files
if not os.path.exists(hp.outputdir):
os.mkdir(hp.outputdir)
with codecs.open(hp.outputdir + '/text.txt', 'w', 'utf-8') as fout:
for i, (x, s) in enumerate(zip(X, outputs2)):
# write text
fout.write(str(i) + "\t" + "".join(idx2char[idx] for idx in np.fromstring(x, np.int32) if idx != 0) + "\n")
s = restore_shape(s, hp.win_length // hp.hop_length, hp.r)
# generate wav files
if hp.use_log_magnitude:
audio = spectrogram2wav(np.power(np.e, s) ** hp.power)
else:
s = np.where(s < 0, 0, s)
audio = spectrogram2wav(s ** hp.power)
write(hp.outputdir + "/{}_{}.wav".format(mname, i), hp.sr, audio)
def eval():
# Load graph
g = Graph(mode="inference"); print("Graph Loaded")
with tf.Session() as sess:
# Initialize variables
tf.sg_init(sess)
# Restore parameters
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint('asset/train'))
print("Restored!")
mname = open('asset/train/checkpoint', 'r').read().split('"')[1] # model name
# Load data
X, Sources, Targets = load_test_data()
char2idx, idx2char = load_vocab()
with codecs.open(mname, "w", "utf-8") as fout:
list_of_refs, hypotheses = [], []
for i in range(len(X) // Hp.batch_size):
# Get mini-batches
x = X[i*Hp.batch_size: (i+1)*Hp.batch_size] # mini-batch
sources = Sources[i*Hp.batch_size: (i+1)*Hp.batch_size]
targets = Targets[i*Hp.batch_size: (i+1)*Hp.batch_size]
preds_prev = np.zeros((Hp.batch_size, Hp.maxlen), np.int32)
preds = np.zeros((Hp.batch_size, Hp.maxlen), np.int32)
for j in range(Hp.maxlen):
# predict next character
outs = sess.run(g.preds, {g.x: x, g.y_src: preds_prev})
# update character sequence
if j < Hp.maxlen - 1:
preds_prev[:, j + 1] = outs[:, j]
preds[:, j] = outs[:, j]
# Write to file
for source, target, pred in zip(sources, targets, preds): # sentence-wise
got = "".join(idx2char[idx] for idx in pred).split(u"?")[0]
fout.write("- source: " + source +"\n")
fout.write("- expected: " + target + "\n")
fout.write("- got: " + got + "\n\n")
fout.flush()
# For bleu score
ref = target.split()
hypothesis = got.split()
if len(ref) > 2:
list_of_refs.append([ref])
hypotheses.append(hypothesis)
# Get bleu score
score = corpus_bleu(list_of_refs, hypotheses)
fout.write("Bleu Score = " + str(100*score))
def eval():
# Load graph
g = Graph(is_training=False)
print("Graph loaded")
# Load data
X = load_eval_data() # texts
char2idx, idx2char = load_vocab()
with g.graph.as_default():
sv = tf.train.Supervisor()
with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
# Restore parameters
sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
print("Restored!")
# Get model
mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
timesteps = 100 # Adjust this number as you want
outputs1 = np.zeros((hp.num_samples, timesteps, hp.n_mels * hp.r), np.float32) # hp.n_mels*hp.r
for j in range(timesteps):
_outputs1 = sess.run(g.outputs1, {g.x: X, g.y: outputs1})
outputs1[:, j, :] = _outputs1[:, j, :]
outputs2 = sess.run(g.outputs2, {g.outputs1: outputs1})
# Generate wav files
if not os.path.exists(hp.outputdir): os.mkdir(hp.outputdir)
with codecs.open(hp.outputdir + '/text.txt', 'w', 'utf-8') as fout:
for i, (x, s) in enumerate(zip(X, outputs2)):
# write text
fout.write(str(i) + "\t" + "".join(idx2char[idx] for idx in np.fromstring(x, np.int32) if idx != 0) + "\n")
s = restore_shape(s, hp.win_length//hp.hop_length, hp.r)
# generate wav files
if hp.use_log_magnitude:
audio = spectrogram2wav(np.power(np.e, s)**hp.power)
else:
s = np.where(s < 0, 0, s)
audio = spectrogram2wav(s**hp.power)
write(hp.outputdir + "/{}_{}.wav".format(mname, i), hp.sr, audio)
def eval():
# Load graph
g = Graph(is_training=False)
with tf.Session(graph=g.graph) as sess:
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
print("Restored!")
mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
print(mname)
# Load data
X, Sources, Targets = load_test_data()
char2idx, idx2char = load_vocab()
with codecs.open(hp.savedir + "/" + mname, "w", "utf-8") as fout:
list_of_refs, hypotheses = [], []
for i in range(len(X) // hp.batch_size):
# Get mini-batches
x = X[i*hp.batch_size: (i+1)*hp.batch_size] # mini-batch
sources = Sources[i*hp.batch_size: (i+1)*hp.batch_size]
targets = Targets[i*hp.batch_size: (i+1)*hp.batch_size]
preds_prev = np.zeros((hp.batch_size, hp.maxlen), np.int32)
preds_prev[:, 0] = 2
preds = np.zeros((hp.batch_size, hp.maxlen), np.int32)
for j in range(hp.maxlen):
# predict next character
outs = sess.run(g.preds, {g.x: x, g.decoder_inputs: preds_prev})
# update character sequence
if j < hp.maxlen - 1:
preds_prev[:, j + 1] = outs[:, j]
preds[:, j] = outs[:, j]
# Write to file
for source, target, pred in zip(sources, targets, preds): # sentence-wise
got = "".join(idx2char[idx] for idx in pred).split(u"?")[0]
fout.write("- source: " + source +"\n")
fout.write("- expected: " + target + "\n")
fout.write("- got: " + got + "\n\n")
fout.flush()
# For bleu score
ref = target.split()
hypothesis = got.split()
if len(ref) > 3 and len(hypothesis) > 3:
list_of_refs.append([ref])
hypotheses.append(hypothesis)
# Get bleu score
score = corpus_bleu(list_of_refs, hypotheses)
fout.write("Bleu Score = " + str(100*score))
eval.py 文件源码
项目:Transformer-in-generating-dialogue
作者: EternalFeather
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def eval():
g = Graph(is_training = False)
print("MSG : Graph loaded!")
X, Sources, Targets = load_data('test')
en2idx, idx2en = load_vocab('en.vocab.tsv')
de2idx, idx2de = load_vocab('de.vocab.tsv')
with g.graph.as_default():
sv = tf.train.Supervisor()
with sv.managed_session(config = tf.ConfigProto(allow_soft_placement = True)) as sess:
# load pre-train model
sv.saver.restore(sess, tf.train.latest_checkpoint(pm.checkpoint))
print("MSG : Restore Model!")
mname = open(pm.checkpoint + '/checkpoint', 'r').read().split('"')[1]
if not os.path.exists('Results'):
os.mkdir('Results')
with codecs.open("Results/" + mname, 'w', 'utf-8') as f:
list_of_refs, predict = [], []
# Get a batch
for i in range(len(X) // pm.batch_size):
x = X[i * pm.batch_size: (i + 1) * pm.batch_size]
sources = Sources[i * pm.batch_size: (i + 1) * pm.batch_size]
targets = Targets[i * pm.batch_size: (i + 1) * pm.batch_size]
# Autoregressive inference
preds = np.zeros((pm.batch_size, pm.maxlen), dtype = np.int32)
for j in range(pm.maxlen):
_preds = sess.run(g.preds, feed_dict = {g.inpt: x, g.outpt: preds})
preds[:, j] = _preds[:, j]
for source, target, pred in zip(sources, targets, preds):
got = " ".join(idx2de[idx] for idx in pred).split("<EOS>")[0].strip()
f.write("- Source: {}\n".format(source))
f.write("- Ground Truth: {}\n".format(target))
f.write("- Predict: {}\n\n".format(got))
f.flush()
# Bleu Score
ref = target.split()
prediction = got.split()
if len(ref) > pm.word_limit_lower and len(prediction) > pm.word_limit_lower:
list_of_refs.append([ref])
predict.append(prediction)
score = corpus_bleu(list_of_refs, predict)
f.write("Bleu Score = " + str(100 * score))
def eval():
# Load graph
g = Graph(is_training=False)
print("Graph loaded")
# Load data
X, Sources, Targets = load_test_data()
de2idx, idx2de = load_de_vocab()
en2idx, idx2en = load_en_vocab()
# X, Sources, Targets = X[:33], Sources[:33], Targets[:33]
# Start session
with g.graph.as_default():
sv = tf.train.Supervisor()
with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
## Restore parameters
sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
print("Restored!")
## Get model name
mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
## Inference
if not os.path.exists('results'): os.mkdir('results')
with codecs.open("results/" + mname, "w", "utf-8") as fout:
list_of_refs, hypotheses = [], []
for i in range(len(X) // hp.batch_size):
### Get mini-batches
x = X[i*hp.batch_size: (i+1)*hp.batch_size]
sources = Sources[i*hp.batch_size: (i+1)*hp.batch_size]
targets = Targets[i*hp.batch_size: (i+1)*hp.batch_size]
### Autoregressive inference
preds = np.zeros((hp.batch_size, hp.maxlen), np.int32)
for j in range(hp.maxlen):
_preds = sess.run(g.preds, {g.x: x, g.y: preds})
preds[:, j] = _preds[:, j]
### Write to file
for source, target, pred in zip(sources, targets, preds): # sentence-wise
got = " ".join(idx2en[idx] for idx in pred).split("</S>")[0].strip()
fout.write("- source: " + source +"\n")
fout.write("- expected: " + target + "\n")
fout.write("- got: " + got + "\n\n")
fout.flush()
# bleu score
ref = target.split()
hypothesis = got.split()
if len(ref) > 3 and len(hypothesis) > 3:
list_of_refs.append([ref])
hypotheses.append(hypothesis)
## Calculate bleu score
score = corpus_bleu(list_of_refs, hypotheses)
fout.write("Bleu Score = " + str(100*score))