def GMM_sample(mus, sigmas, mix_weights):
"""
First, sample according to the prior mixing probabilities
to choose the component density.
Second, draw sample from that density
Inspired by implementation in `cle`
"""
chosen_component = \
T.argmax(
srng.multinomial(pvals=mix_weights),
axis=1)
selected_mus = mus[T.arange(mus.shape[0]), :, chosen_component]
selected_sigmas = sigmas[T.arange(sigmas.shape[0]), :, chosen_component]
sample = srng.normal(size=selected_mus.shape,
avg=0.,
std=1.)
sample *= selected_sigmas
sample += selected_mus
return sample, selected_mus, selected_sigmas, chosen_component
python类argmax()的实例源码
def crossentropy(y_pred, y_true, void_labels, one_hot=False):
# Clip predictions
y_pred = T.clip(y_pred, _EPSILON, 1.0 - _EPSILON)
if one_hot:
y_true = T.argmax(y_true, axis=1)
# Create mask
mask = T.ones_like(y_true, dtype=_FLOATX)
for el in void_labels:
mask = T.set_subtensor(mask[T.eq(y_true, el).nonzero()], 0.)
# Modify y_true temporarily
y_true_tmp = y_true * mask
y_true_tmp = y_true_tmp.astype('int32')
# Compute cross-entropy
loss = T.nnet.categorical_crossentropy(y_pred, y_true_tmp)
# Compute masked mean loss
loss *= mask
loss = T.sum(loss) / T.sum(mask)
return loss
def eval_classificationT( self, y, p_y):
"""Calculate the error (100 - accuracy) of the DNN in the case of classification.
:type y: vector
:param y: vector (r,) of labels
:type p_y: matrix
:param p_y: matrix of the output of the network. Each raw is a vector of probailities (probablities of the classes)
"""
y_ = T.argmax(p_y, axis = 1)
# Accuracy
error = 1 - T.mean(T.eq(y_, y) * 1.)
error = error * 100.
return error
roc_auc.py 文件源码
项目:deep-mil-for-whole-mammogram-classification
作者: wentaozhu
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def on_epoch_end(self, epoch, logs={}):
if epoch % self.interval == 0:
y_pred = self.model.predict(self.X_val, verbose=0)
#print(np.sum(y_pred[:,1]))
#y_true = np.argmax(self.y_val, axis=1)
#y_pred = np.argmax(y_pred, axis=1)
#print(y_true.shape, y_pred.shape)
if self.mymil:
score = roc_auc_score(self.y_val.max(axis=1), y_pred.max(axis=1))
else: score = roc_auc_score(self.y_val[:,1], y_pred[:,1])
print("interval evaluation - epoch: {:d} - auc: {:.2f}".format(epoch, score))
if score > self.auc:
self.auc = score
for f in os.listdir('./'):
if f.startswith(self.filepath+'auc'):
os.remove(f)
self.model.save(self.filepath+'auc'+str(score)+'ep'+str(epoch)+'.hdf5')
roc_auc.py 文件源码
项目:deep-mil-for-whole-mammogram-classification
作者: wentaozhu
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def on_epoch_end(self, epoch, logs={}):
if epoch % self.interval == 0:
y_pred = self.model.predict(self.X_val, verbose=0)
if self.mymil:
y_true = self.y_val.max(axis=1)
y_score = y_pred.max(axis=1)>0.5
else:
y_true = np.argmax(self.y_val, axis=1)
y_score = np.argmax(y_pred, axis=1)
#print(type(y_true), y_true.shape, type(y_score), y_score.shape)
#print(y_score, y_true)
TP = np.sum(y_true[y_score==1]==1)*1. #/ sum(y_true)
FP = np.sum(y_true[y_score==1]==0)*1. #/ (y_true.shape[0]-sum(y_true))
prec = TP / (TP+FP+1e-6)
print("interval evaluation - epoch: {:d} - prec: {:.2f}".format(epoch, prec))
if prec > self.prec:
self.prec = prec
for f in os.listdir('./'):
if f.startswith(self.filepath+'prec'):
os.remove(f)
self.model.save(self.filepath+'prec'+str(prec)+'ep'+str(epoch)+'.hdf5')
roc_auc.py 文件源码
项目:deep-mil-for-whole-mammogram-classification
作者: wentaozhu
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def on_epoch_end(self, epoch, logs={}):
if epoch % self.interval == 0:
y_pred = self.model.predict(self.X_val, verbose=0)
if self.mymil:
y_true = self.y_val.max(axis=1)
y_score = y_pred.max(axis=1)>0.5
else:
y_true = np.argmax(self.y_val, axis=1)
y_score = np.argmax(y_pred, axis=1)
#print(type(y_true), y_true.shape, type(y_score), y_score.shape)
TP = np.sum(y_true[y_score==1]==1)*1. #/ sum(y_true)
FN = np.sum(y_true[y_score==0]==1)*1. #/ sum(y_true)
reca = TP / (TP+FN+1e-6)
print("interval evaluation - epoch: {:d} - reca: {:.2f}".format(epoch, reca))
if reca > self.reca:
self.reca = reca
for f in os.listdir('./'):
if f.startswith(self.filepath+'reca'):
os.remove(f)
self.model.save(self.filepath+'reca'+str(reca)+'ep'+str(epoch)+'.hdf5')
roc_auc.py 文件源码
项目:deep-mil-for-whole-mammogram-classification
作者: wentaozhu
项目源码
文件源码
阅读 39
收藏 0
点赞 0
评论 0
def on_epoch_end(self, epoch, logs={}):
if epoch % self.interval == 0:
y_pred = self.model.predict(self.X_val, verbose=0)
#print(y_pred.shape)
if self.mymil:
y_true = self.y_val.max(axis=1)
y_score = y_pred.max(axis=1)>0.5
else:
y_true = np.argmax(self.y_val, axis=1)
y_score = y_pred[np.arange(len(y_true)), y_true] #y_pred[:, y_true] #np.argmax(y_pred, axis=1)
loss = -np.mean(np.log(y_score+1e-6)) #-np.mean(y_true*np.log(y_score+1e-6) + (1-y_true)*np.log(1-y_score+1e-6))
print('')
print("interval evaluation - epoch: {:d} - loss: {:.2f}".format(epoch, loss))
if loss < self.loss:
self.loss = loss
for f in os.listdir('./'):
if f.startswith(self.filepath+'loss'):
os.remove(f)
self.model.save(self.filepath+'loss'+str(loss)+'ep'+str(epoch)+'.hdf5')
def predict(self, tx, tm, twx, tcm, tgaze, tlemma = None, tpos = None):
i = 0
pys = []
while i < self.tx.shape[0]:
# j = min(self.x.shape[0], i + self.test_batch_size)
j = i + self.test_batch_size
s_x, s_m, s_wx, s_cm = tx[i: j], tm[i: j], twx[i: j], tcm[i: j]
s_gaze = tgaze[i: j] if self.use_gaze else None
s_lemma = tlemma[i: j] if self.use_lemma else None
s_pos = tpos[i: j] if self.use_pos else None
pys.append(self.test_fn(s_x, s_m, s_wx, s_cm, s_gaze, s_lemma, s_pos))
i = j
py = np.vstack(tuple(pys))
if self.use_crf:
return py.flatten()
else:
return py.argmax(axis = 1)
def applySoftMax( inputSample, inputSampleShape, numClasses, softmaxTemperature):
inputSampleReshaped = inputSample.dimshuffle(0, 2, 3, 4, 1)
inputSampleFlattened = inputSampleReshaped.flatten(1)
numClassifiedVoxels = inputSampleShape[2]*inputSampleShape[3]*inputSampleShape[4]
firstDimOfinputSample2d = inputSampleShape[0]*numClassifiedVoxels
inputSample2d = inputSampleFlattened.reshape((firstDimOfinputSample2d, numClasses))
# Predicted probability per class.
p_y_given_x_2d = T.nnet.softmax(inputSample2d/softmaxTemperature)
p_y_given_x_class = p_y_given_x_2d.reshape((inputSampleShape[0],
inputSampleShape[2],
inputSampleShape[3],
inputSampleShape[4],
inputSampleShape[1]))
p_y_given_x = p_y_given_x_class.dimshuffle(0,4,1,2,3)
y_pred = T.argmax(p_y_given_x, axis=1)
return ( p_y_given_x, y_pred )
# ----------------- Apply Bias to feat maps ---------------#
def max_oracle(scores,
y_truth):
n_classes = scores.shape[1]
t_range = T.arange(y_truth.shape[0])
# classification loss for any combination
losses = 1. - T.extra_ops.to_one_hot(y_truth, n_classes)
# get max score for each sample
y_star = T.argmax(scores + losses, axis=1)
# compute classification loss for batch
delta = losses[t_range, y_star].sum()
return y_star, delta
def generative_sampling(self, seed, emb_data, sample_length):
fruit = theano.shared(value=seed)
def step(h_tm, y_tm):
h_t = self.activation(T.dot(emb_data[y_tm], self.W) +
T.dot(h_tm, self.U) + self.bh)
y_t = T.nnet.softmax(T.dot(h_t, self.V) + self.by)
y = T.argmax(y_t, axis=1)
return h_t, y[0]
[_, samples], _ = theano.scan(fn=step,
outputs_info=[self.h0, fruit],
n_steps=sample_length)
get_samples = theano.function(inputs=[],
outputs=samples)
return get_samples()
def __init__(self, input, n_in, n_out, verbose):
self.verbose = verbose
self.W = Weight((n_in, n_out))
self.b = Weight((n_out,), std=0)
self.p_y_given_x = T.nnet.softmax(
T.dot(input, self.W.val) + self.b.val)
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
self.params = [self.W.val, self.b.val]
self.weight_type = ['W', 'b']
if self.verbose:
print 'softmax layer with num_in: ' + str(n_in) + \
' num_out: ' + str(n_out)
def score_metrics(out, target_var, weight_map, l2_loss=0):
_EPSILON=1e-8
out_flat = out.dimshuffle(1,0,2,3).flatten(ndim=2).dimshuffle(1,0)
target_flat = target_var.dimshuffle(1,0,2,3).flatten(ndim=1)
weight_flat = weight_map.dimshuffle(1,0,2,3).flatten(ndim=1)
prediction = lasagne.nonlinearities.softmax(out_flat)
prediction_binary = T.argmax(prediction, axis=1)
dice_score = (T.sum(T.eq(2, prediction_binary+target_flat))*2.0 /
(T.sum(prediction_binary) + T.sum(target_flat)))
loss = lasagne.objectives.categorical_crossentropy(T.clip(prediction,_EPSILON,1-_EPSILON), target_flat)
loss = loss * weight_flat
loss = loss.mean()
loss += l2_loss
accuracy = T.mean(T.eq(prediction_binary, target_flat),
dtype=theano.config.floatX)
return loss, accuracy, dice_score, target_flat, prediction, prediction_binary
def __init__(self, name, x, y, n_in, n_out):
self.x= x
self.name = name
# weight matrix W (n_in, n_out)
self.W = theano.shared(
value=np.zeros((n_in, n_out), dtype=theano.config.floatX),
name='W',
borrow=True)
# bias vector b (n_out, )
self.b = theano.shared(
value=np.zeros((n_out,), dtype=theano.config.floatX),
name='b',
borrow=True)
# p(y|x, w, b)
self.p_y_given_x = T.nnet.softmax(T.dot(x, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
self.negative_log_likelihood = -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
self.errors = T.mean(T.neq(self.y_pred, y))
# params
self.params = [self.W, self.b]
def get_layer(self, x_in, C_in, ty_i): # op,
n_steps = C_in.shape[0]
def __logsumexp(x, axis=None):
xmax = x.max(axis=axis, keepdims=True)
xmax_ = x.max(axis=axis)
return xmax_ + T.log(T.exp(x - xmax).sum(axis=axis))
def __step(_C, _x):
#scores = T.dot( T.dot(_x, self._params['U']) + self._params['b'], self._params['v0'])
scores = T.dot(T.nnet.sigmoid(T.dot(_x, self._params[
'U1']) + T.dot(_C, self._params['U2']) + self._params['b']), self._params['v0'])
return scores.flatten()
y_out, _ = theano.scan(
__step, sequences=C_in, non_sequences=x_in, name='classification_layer', n_steps=n_steps)
norm_y = y_out.flatten() - __logsumexp(y_out)
f_lc_debug = theano.function(
[x_in, C_in, ty_i], [y_out, norm_y, norm_y[ty_i]])
return norm_y[ty_i], T.argmax(norm_y), f_lc_debug
def __init__(self, x, y, n_x, n_y):
# initialize with 0 the weights as a matrix of shape (n_in, n_out)
self.w = theano.shared(
value=numpy.zeros((n_x, n_y), dtype=theano.config.floatX),
name='w',
borrow=True
)
# initialize the biases b as a vector of n_out 0s
self.b = theano.shared(
value=numpy.zeros((n_y,), dtype=theano.config.floatX),
name='b',
borrow=True
)
self.params = [self.w, self.b]
# save x, y
self.x = x
self.y = y
# calculate
p_y_given_x = T.nnet.softmax(T.dot(self.x, self.w) + self.b)
# probability is maximal
y_pred = T.argmax(p_y_given_x, axis=1)
# error
self.error = T.mean(T.neq(y_pred, self.y))
# cost
self.cost = -T.mean(T.log(p_y_given_x)[T.arange(self.y.shape[0]), self.y])
def test_testset():
graph.change_flag(-1)
test_accuracy = []
for index in range(test_gen.max_index):
confusion_matrix = np.zeros((128, 10)).astype('int32')
for times in range(10):
testset = test_gen.get_minibatch(index) # re-sample again, same data, different preprocessing
test_output = test_func_output(testset[0])
test_output = int_to_onehot(test_output, 10)
confusion_matrix += test_output
testset = test_gen.get_minibatch(index)
test_batch_answer = np.argmax(confusion_matrix, axis=-1)
test_batch_accuracy = np.mean(np.equal(test_batch_answer, testset[1]))
test_accuracy.append(test_batch_accuracy)
hist.history['test_accuracy'].append(np.mean(np.asarray(test_accuracy)))
#================Train================#
def __init__(self, input, n_in, n_out):
self.W = theano.shared(
value=np.zeros(
(n_in, n_out),
dtype=theano.config.floatX
),
name='W',
borrow=True
)
self.b = theano.shared(
value=np.zeros(
(n_out,),
dtype=theano.config.floatX
),
name='b',
borrow=True
)
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
self.params = [self.W, self.b]
self.input = input
def GMM_sample(mus, sigmas, mix_weights):
"""
First, sample according to the prior mixing probabilities
to choose the component density.
Second, draw sample from that density
Inspired by implementation in `cle`
"""
chosen_component = \
T.argmax(
srng.multinomial(pvals=mix_weights),
axis=1)
selected_mus = mus[T.arange(mus.shape[0]), :, chosen_component]
selected_sigmas = sigmas[T.arange(sigmas.shape[0]), :, chosen_component]
sample = srng.normal(size=selected_mus.shape,
avg=0.,
std=1.)
sample *= selected_sigmas
sample += selected_mus
return sample, selected_mus, selected_sigmas, chosen_component
def __init__(self, input, n_in, n_out):
# initialize with 0 the weights W as a matrix of shape (n_in, n_out)
self.W = theano.shared(value=numpy.zeros((n_in, n_out),
dtype=theano.config.floatX),
name='W', borrow=True)
# initialize the baises b as a vector of n_out 0s
self.b = theano.shared(value=numpy.zeros((n_out,),
dtype=theano.config.floatX),
name='b', borrow=True)
# compute vector of class-membership probabilities in symbolic form
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.p_y_given_x_printed = theano.printing.Print('p_y_given_x = ')(self.p_y_given_x)
#self.p_y_given_x_printed = self.p_y_given_x
# compute prediction as class whose probability is maximal in
# symbolic form
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
# parameters of the model
self.params = [self.W, self.b]
def __init__(self, input, n_in, n_out):
self.W = theano.shared(
value=numpy.zeros(
(n_in, n_out),
dtype=theano.config.floatX
),
name='W',
borrow=True
)
self.b = theano.shared(
value=numpy.zeros(
(n_out,),
dtype=theano.config.floatX
),
name='b',
borrow=True
)
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
self.params = [self.W, self.b]
def __init__(self, input, n_in, n_out):
self.W = theano.shared(value=numpy.zeros((n_in, n_out),
dtype=theano.config.floatX),
name='W',
borrow=True)
self.b = theano.shared(value=numpy.zeros((n_out, ),
dtype=theano.config.floatX),
name='b',
borrow=True)
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
self.params = [self.W, self.b]
self.input = input
cnn_cascade_lasagne.py 文件源码
项目:Cascade-CNN-Face-Detection
作者: gogolgrind
项目源码
文件源码
阅读 45
收藏 0
点赞 0
评论 0
def __build_loss_train__fn__(self):
# create loss function
prediction = layers.get_output(self.net)
loss = objectives.categorical_crossentropy(prediction, self.__target_var__)
loss = loss.mean() + 1e-4 * regularization.regularize_network_params(self.net, regularization.l2)
val_acc = T.mean(T.eq(T.argmax(prediction, axis=1), self.__target_var__),dtype=theano.config.floatX)
# create parameter update expressions
params = layers.get_all_params(self.net, trainable=True)
self.eta = theano.shared(sp.array(sp.float32(0.05), dtype=sp.float32))
update_rule = updates.nesterov_momentum(loss, params, learning_rate=self.eta,
momentum=0.9)
# compile training function that updates parameters and returns training loss
self.__train_fn__ = theano.function([self.__input_var__,self.__target_var__], loss, updates=update_rule)
self.__predict_fn__ = theano.function([self.__input_var__], layers.get_output(self.net,deterministic=True))
self.__val_fn__ = theano.function([self.__input_var__,self.__target_var__], [loss,val_acc])
def __init__(self,n_input,n_output,x):
self.n_input=n_input
self.n_output=n_output
self.x=x.reshape([-1,x.shape[-1]])
init_W=np.asarray(np.random.uniform(low=-np.sqrt(1./n_input),
high=np.sqrt(1./n_input),
size=(n_input,n_output)),dtype=theano.config.floatX)
init_b=np.zeros((n_output),dtype=theano.config.floatX)
self.W=theano.shared(value=init_W,name='output_W',borrow=True)
self.b=theano.shared(value=init_b,name='output_b',borrow=True)
self.params=[self.W,self.b]
self.activation=T.nnet.softmax(T.dot(self.x,self.W)+self.b)
self.predict=T.argmax(self.activation,axis=-1)
def build(self):
# correct word probability (b,1)
c_o_t = T.exp(T.sum(self.W[self.y]*self.x,axis=-1) + self.b[self.y])
# negative word probability (b,k)
n_o_t = T.exp(T.sum(self.W[self.y_neg]*self.x.dimshuffle(0,'x',1),axis=-1)+ self.b[self.y_neg])
# positive probability
c_o_p = c_o_t / (c_o_t + self.k * self.q_w[self.y])
# negative probability (k,1)
n_o_p = self.q_w[self.y_neg] / (n_o_t + self.k * self.q_w[self.y_neg])
# cost for each y in nce
self.activation = -T.sum((T.log(c_o_p) + T.sum(T.log(n_o_p),axis=-1))*self.y_mask)/(T.sum(self.y_mask)*(self.k+1))
self.probability = T.nnet.softmax(T.dot(self.x,self.W.T) + self.b)
self.predict = T.argmax(self.probability, axis=-1)
def build(self):
# blackout version output probability
# correct word probability (b,1)
c_o_t = T.exp(T.sum(self.W[self.y] * self.x, axis=-1) + self.b[self.y])
# negative word probability (b,k)
n_o_t = T.exp(T.sum(self.W[self.y_neg] * self.x.dimshuffle(0, 'x', 1), axis=-1) + self.b[self.y_neg])
# sample set probability
t_o = (self.q_w[self.y] * c_o_t) + T.sum(self.q_w[self.y_neg] * n_o_t,axis=-1)
# positive probability (b,1)
c_o_p = self.q_w[self.y] * c_o_t / t_o
# negative probability (b,k)
n_o_p = self.q_w[self.y_neg] * n_o_t / t_o.dimshuffle(0,'x')
self.sumed=t_o
self.other=T.log(c_o_p) + T.sum(T.log(1. - n_o_p),axis=-1)
# cost for each y in blackout
self.activation = -T.sum((T.log(c_o_p) + T.sum(T.log(1. - n_o_p),axis=-1))*self.y_mask)/(T.sum(self.y_mask))#*(self.k+1))
att = T.nnet.softmax(T.dot(self.x, self.W) + self.b)
self.predict = T.argmax(att, axis=-1)
def score_metrics(out, target_var, weight_map, l2_loss=0):
_EPSILON=1e-8
out_flat = out.dimshuffle(1,0,2,3).flatten(ndim=2).dimshuffle(1,0)
target_flat = target_var.dimshuffle(1,0,2,3).flatten(ndim=1)
weight_flat = weight_map.dimshuffle(1,0,2,3).flatten(ndim=1)
prediction = lasagne.nonlinearities.softmax(out_flat)
prediction_binary = T.argmax(prediction, axis=1)
dice_score = (T.sum(T.eq(2, prediction_binary+target_flat))*2.0 /
(T.sum(prediction_binary) + T.sum(target_flat)))
loss = lasagne.objectives.categorical_crossentropy(T.clip(prediction,_EPSILON,1-_EPSILON), target_flat)
loss = loss * weight_flat
loss = loss.mean()
loss += l2_loss
accuracy = T.mean(T.eq(prediction_binary, target_flat),
dtype=theano.config.floatX)
return loss, accuracy, dice_score, target_flat, prediction, prediction_binary
def output_func(self, input):
# P(Y|X) = softmax(W.X + b)
features = input[0]
session_info = input[1]
exam = 1 / (1 + T.exp(-T.dot(features, self.W[0]) - self.b[0]))
rel = 1 / (1 + T.exp(-T.dot(features, self.W[1]) - self.b[1]))
p_1 = exam * rel
#p_1 = 1 / (1 + T.exp(-T.dot(features, self.W) - self.b))
self.y_pred = p_1 > 0.5
self.p_y_given_x = T.horizontal_stack(1 - p_1, p_1)
#self.p_y_given_x = T.nnet.softmax(self._dot(input, self.W) + self.b)
#self.y_pred = T.argmax(self.p_y_given_x, axis=1)
#comput add loss
#q_info = session_info[:,0]
#u_info = session_info[:,1:]
r_info = session_info[:,1 + self.dim:]
self.rel_model_loss = T.pow(rel - r_info, 2)
#prev_rel = 1 / (1 + T.exp(-T.dot(features, self.R_W) - self.R_b))
#self.rel_const_loss = T.pow(rel - prev_rel, 2)
return self.y_pred
def build_model(model_):
global fn_predict, fn_record
global g_ozer, g_mdl
g_ozer = dict(simple=VanillaSGD, adam=AdamSGD)[OZER]()
g_ozer.lr = LEARN_RATE
s_x = T.tensor4('x')
s_y = T.ivector('y')
s_pdpo = T.scalar()
s_out = model_(s_x, s_pdpo)
s_y_onehot = T.extra_ops.to_one_hot(s_y, len(g_dataset.label_map))
s_loss = T.mean(-s_y_onehot*T.log(s_out + 1e-3))
s_accr = T.mean( T.switch(
T.eq(T.argmax(s_out, axis=1), T.argmax(s_y_onehot, axis=1)), 1, 0))
no_dropout = [(s_pdpo, T.constant(0., dtype=th.config.floatX))]
fn_predict = th.function(
[s_x, s_y],
{'pred':s_out, 'accr':s_accr, 'loss':s_loss},
givens=no_dropout, profile=PROFILE)
rec_fetches = {
'x': s_x, 'y': s_y,
'pred': s_out}
rec_fetches.update(g_mdl.params_di)
fn_record = th.function(
[s_x, s_y], rec_fetches, givens=no_dropout, profile=PROFILE)
g_ozer.compile(
[s_x, s_y],
s_loss,
g_mdl.params_di.values(),
fetches_={'pred': s_out, 'loss': s_loss, 'accr': s_accr},
givens_=[(s_pdpo, T.constant(TRAIN_PDPO, dtype=th.config.floatX))],
profile_=PROFILE)
def softmax_and_sample(logits):
old_shape = logits.shape
flattened_logits = logits.reshape((-1, logits.shape[logits.ndim-1]))
samples = T.cast(
srng.multinomial(pvals=T.nnet.softmax(flattened_logits)),
theano.config.floatX
).reshape(old_shape)
return T.argmax(samples, axis=samples.ndim-1)
# TODO: Have a look at this benchmark:
# https://github.com/MaximumEntropy/cudnn_rnn_theano_benchmarks