def nll_loss_sharedparams(self, mus, sigmas, corxy, pis, y_true):
mus_ex = mus[np.newaxis, :, :]
X = y_true[:, np.newaxis, :]
diff = X - mus_ex
diffprod = T.prod(diff, axis=-1)
corxy2 = corxy **2
diff2 = diff ** 2
sigmas2 = sigmas ** 2
sigmainvs = 1.0 / sigmas
sigmainvprods = sigmainvs[:, 0] * sigmainvs[:, 1]
diffsigma = diff2 / sigmas2
diffsigmanorm = T.sum(diffsigma, axis=-1)
z = diffsigmanorm - 2 * corxy * diffprod * sigmainvprods
oneminuscorxy2inv = 1.0 / (1.0 - corxy2)
expterm = -0.5 * z * oneminuscorxy2inv
new_exponent = T.log(0.5/np.pi) + T.log(sigmainvprods) + T.log(np.sqrt(oneminuscorxy2inv)) + expterm + T.log(pis)
max_exponent = T.max(new_exponent ,axis=1, keepdims=True)
mod_exponent = new_exponent - max_exponent
gauss_mix = T.sum(T.exp(mod_exponent),axis=1)
log_gauss = max_exponent + T.log(gauss_mix)
loss = -T.mean(log_gauss)
return loss
python类max()的实例源码
def log_marginal(self, y, h, py, q):
'''Computes the approximate log marginal.
Uses \log \sum p / q - \log N
Args:
y: T.tensor, target values.
h: T.tensor, latent samples.
py: T.tesnor, conditional density p(y | h)
q: approximate posterior q(h | y)
Returns:
approximate log marginal.
'''
log_py_h = -self.conditional.neg_log_prob(y, py)
log_ph = -self.prior.neg_log_prob(h)
log_qh = -self.posterior.neg_log_prob(h, q)
assert log_py_h.ndim == log_ph.ndim == log_qh.ndim
log_p = log_py_h + log_ph - log_qh
log_p_max = T.max(log_p, axis=0, keepdims=True)
w = T.exp(log_p - log_p_max)
return (T.log(w.mean(axis=0, keepdims=True)) + log_p_max).mean()
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
active_next = T.cast(T.minimum(
T.maximum(
active + 1,
T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
), log_p_curr.shape[0]), 'int32')
common_factor = T.max(log_p_prev[:active])
p_prev = T.exp(log_p_prev[:active] - common_factor)
_p_prev = zeros[:active_next]
# copy over
_p_prev = T.set_subtensor(_p_prev[:active], p_prev)
# previous transitions
_p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
# skip transitions
_p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
updated_log_p_prev = T.log(_p_prev) + common_factor
log_p_next = T.set_subtensor(
zeros[:active_next],
log_p_curr[:active_next] + updated_log_p_prev
)
return active_next, log_p_next
def multiclass_hinge_loss(self, predictions, targets, delta=1):
num_cls = predictions.shape[1]
if targets.ndim == predictions.ndim - 1:
targets = T.extra_ops.to_one_hot(targets, num_cls)
elif targets.ndim != predictions.ndim:
raise TypeError('rank mismatch between targets and predictions')
corrects = predictions[targets.nonzero()]
rest = T.reshape(predictions[(1-targets).nonzero()],
(-1, num_cls-1))
rest = T.max(rest, axis=1)
return T.nnet.relu(rest - corrects + delta).mean()
def theano_logsumexp(x, axis=None):
"""
Compute log(sum(exp(x), axis=axis) in a numerically stable
fashion.
Parameters
----------
x : tensor_like
A Theano tensor (any dimension will do).
axis : int or symbolic integer scalar, or None
Axis over which to perform the summation. `None`, the
default, performs over all axes.
Returns
-------
result : ndarray or scalar
The result of the log(sum(exp(...))) operation.
"""
xmax = T.max(x, axis = axis, keepdims = True)
xmax_ = T.max(x, axis = axis)
return xmax_ + T.log(T.exp(x - xmax).sum(axis = axis))
def compile_train(self, *args):
# args is a list of dictionaries
if self.verbose: print('compiling training function...')
import theano
for arg_list in args:
self.compiled_train_fn_list.append(theano.function(**arg_list))
if self.monitor_grad:
norms = [grad.norm(L=2) for grad in self.grads]
import theano.tensor as T
norms = T.log10(norms)
self.get_norm = theano.function([self.subb_ind], [T.sum(norms), T.max(norms)],
givens=[(self.x, self.shared_x_slice),
(self.y, self.shared_y_slice)]
)
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
active_next = T.cast(T.minimum(
T.maximum(
active + 1,
T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
), log_p_curr.shape[0]), 'int32')
common_factor = T.max(log_p_prev[:active])
p_prev = T.exp(log_p_prev[:active] - common_factor)
_p_prev = zeros[:active_next]
# copy over
_p_prev = T.set_subtensor(_p_prev[:active], p_prev)
# previous transitions
_p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
# skip transitions
_p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
updated_log_p_prev = T.log(_p_prev) + common_factor
log_p_next = T.set_subtensor(
zeros[:active_next],
log_p_curr[:active_next] + updated_log_p_prev
)
return active_next, log_p_next
def theano_logsumexp(x, axis=None):
"""
Compute log(sum(exp(x), axis=axis) in a numerically stable
fashion.
Parameters
----------
x : tensor_like
A Theano tensor (any dimension will do).
axis : int or symbolic integer scalar, or None
Axis over which to perform the summation. `None`, the
default, performs over all axes.
Returns
-------
result : ndarray or scalar
The result of the log(sum(exp(...))) operation.
"""
xmax = x.max(axis=axis, keepdims=True)
xmax_ = x.max(axis=axis)
return xmax_ + T.log(T.exp(x - xmax).sum(axis=axis))
def pool(self, input, window, mode, stride, pad, autopad):
if mode == 'max':
mode = 'max'
elif mode == 'sum':
mode = 'sum'
elif mode == 'avg':
mode = 'average_exc_pad'
elif mode == 'avgpad':
mode = 'average_inc_pad'
else:
mode = 'sum'
if input.ndim == 4:
return P.pool_2d(input=input, ws=window, ignore_border=not autopad, stride=stride, pad=pad, mode=mode)
elif input.ndim == 5:
return P.pool_3d(input=input, ws=window, ignore_border=not autopad, stride=stride, pad=pad, mode=mode)
else:
basic.defaultreturn()
def needed_key(self):
return self._needed_key_impl('activation_fn')
# class MaxPool(Chip):
# ''' This class_chip collapses the input tensor by max pooling along its last dimension.
# '''
# def construct(self, input_tv):
# pool_size = self.prm('pool_size')
# y = T.reshape(input_tv,
# ([input_tv.shape[i] for i in range(input_tv.ndim - 1)]
# + [T.floor_div(input_tv.shape[input_tv.ndim - 1], pool_size).astype('int32'), pool_size]),
# ndim=input_tv.ndim + 1)
# self.output_tv = T.max(y, axis=y.ndim - 1)
# return tuple()
# def needed_key(self):
# return self._needed_key_impl('pool_size')
def get_pooling_padding_and_theano_pool_mode(
pool_size, border_mode, pool_mode):
if border_mode == BorderMode.same:
padding = [x - (2 if x%2==1 else 1) for x in pool_size]
elif border_mode == BorderMode.valid:
padding = (0, 0)
else:
raise RuntimeError("Valid border modes are: "+str(BorderMode.vals)
+", got: "+str(border_mode))
if (pool_mode == PoolMode.max):
theano_pool_mode = 'max'
elif (pool_mode == PoolMode.avg):
theano_pool_mode = 'average_exc_pad'
else:
raise RuntimeError("Valid pool modes are: "+str(PoolMode.vals)
+", got: "+str(pool_mode))
return padding, theano_pool_mode
def get_output(self, train=False):
print(len(self.layers))
u=self.layers[0].get_output(train)
t=self.layers[1].get_output(train)
#tp=t[0]
#tn=t[1]
#un=T.dot(u,u)
#return [T.dot(u,tp)/(un*T.dot(tp,tp)) ,T.dot(u,tn)/(un*T.dot(tn,tn))]
#theano.printing.pprint('vals')
#x=T.dvector()
#printed_u = hello_world_op(x)
#f = theano.function([x], printed_u)
#f(['here'])
#T.reshape(u,[2,1])
#T.reshape(t,[1,2,2])
#d=T.dot(t.dimshuffle(1, 0, 2), u)
#u1=self.activation(u)
#t.reshape([2,2,2])
return T.max( (([u ,u]*t.dimshuffle(1,0,2)).dimshuffle(1,0,2)),2)#.reshape([2,2])
#return d.dimshuffle(1,0,2) #just dot product
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
active_next = T.cast(T.minimum(
T.maximum(
active + 1,
T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
), log_p_curr.shape[0]), 'int32')
common_factor = T.max(log_p_prev[:active])
p_prev = T.exp(log_p_prev[:active] - common_factor)
_p_prev = zeros[:active_next]
# copy over
_p_prev = T.set_subtensor(_p_prev[:active], p_prev)
# previous transitions
_p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
# skip transitions
_p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
updated_log_p_prev = T.log(_p_prev) + common_factor
log_p_next = T.set_subtensor(
zeros[:active_next],
log_p_curr[:active_next] + updated_log_p_prev
)
return active_next, log_p_next
def test_kmax_pool():
nbatches, nkernels_in, nwords, ndim = 2, 1, 5, 3
input_shape = (nbatches, nkernels_in, nwords, ndim)
input = T.tensor4('input')
k = 3
f_kmax = theano.function([input], k_max_pooling(input, k))
f_max = theano.function([input], max_pooling(input))
image_data = np.arange(np.prod(input_shape), dtype=np.float64)
np.random.shuffle(image_data)
image_data = image_data.reshape(input_shape)
print image_data
print 'kmax'
print f_kmax(image_data)
print 'max'
print f_max(image_data)
def test_kmax_pool():
nbatches, nkernels_in, nwords, ndim = 2, 1, 5, 3
input_shape = (nbatches, nkernels_in, nwords, ndim)
input = T.tensor4('input')
k = 3
f_kmax = theano.function([input], k_max_pooling(input, k))
f_max = theano.function([input], max_pooling(input))
image_data = np.arange(np.prod(input_shape), dtype=np.float64)
np.random.shuffle(image_data)
image_data = image_data.reshape(input_shape)
print image_data
print 'kmax'
print f_kmax(image_data)
print 'max'
print f_max(image_data)
def inv(self, output):
output = output.dimshuffle(0,1,2,'x').repeat(self.pool_shape[0], axis=3)
if self.depooler == 'random':
mask = self.theano_rng.uniform(size=output.shape, dtype=theano.config.floatX)
mask = T.floor(mask / mask.max(axis=3).dimshuffle(0,1,2,'x'))
output = mask * output
elif self.depooler == 'first':
mask_np = np.zeros(self.pool_shape, dtype=theano.config.floatX)
mask_np[0] = 1.0
mask = theano.shared(mask_np, borrow=True).dimshuffle('x','x','x',0)
output = mask * output
else:
output = self.depooler(output, axis=3)
return output.reshape(self.input_shape)
def get_pooling_batch(hs, mask, pooling_method):
"""
:param hs: (batch, len, dim)
:param mask: (batch, len)
:param pooling_method:
:return:
"""
if pooling_method == 'max':
add_v = ((1 - mask) * -BIG_INT)[:, :, None]
return T.max(hs + add_v, axis=1)
elif pooling_method == 'min':
add_v = ((1 - mask) * BIG_INT)[:, :, None]
return T.min(hs + add_v, axis=1)
elif pooling_method in ['averaging', 'mean' , 'average']:
return T.sum(hs * mask[:, :, None], axis=1) / T.sum(mask, axis=1)[:, None]
elif pooling_method == 'sum':
return T.sum(hs * mask[:, :, None], axis=1)
elif pooling_method in ['final', 'last']:
return hs[:, -1, :]
else:
raise NotImplementedError('Not implemented pooling method: {}'.format(pooling_method))
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
active_next = T.cast(T.minimum(
T.maximum(
active + 1,
T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
), log_p_curr.shape[0]), 'int32')
common_factor = T.max(log_p_prev[:active])
p_prev = T.exp(log_p_prev[:active] - common_factor)
_p_prev = zeros[:active_next]
# copy over
_p_prev = T.set_subtensor(_p_prev[:active], p_prev)
# previous transitions
_p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
# skip transitions
_p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
updated_log_p_prev = T.log(_p_prev) + common_factor
log_p_next = T.set_subtensor(
zeros[:active_next],
log_p_curr[:active_next] + updated_log_p_prev
)
return active_next, log_p_next
def mlp_layer_softmax(tparams, layer1_input, prefix='mlp_layer'):
""" layer1_input: n_sample * n_feature 64*20
input_shape: (num of hiddens, number of input features) 200*20
pred_shape: (num of labels, number of hiddens) 2*200
y_recon : n_label *n_sample 2*64
"""
hidden_2_out = tensor.nnet.sigmoid(tensor.dot(layer1_input, tparams[_p(prefix,'W1')].T) + tparams[_p(prefix,'b1')] ) # 64*200
y_recons = tensor.dot(hidden_2_out, tparams[_p(prefix,'V1')].T) + tparams[_p(prefix,'c1')]
#y_recons = tensor.tanh(y_recons) * 10 # avoid numerical issues/label smoothing
#y_recons = tensor.nnet.softmax(y_recons) # 64*2
max_w = tensor.max(y_recons, axis = 1, keepdims=True)
e0 = tensor.exp(y_recons - max_w)
y_recons = e0 / tensor.sum(e0, axis = 1, keepdims=True)
return y_recons
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
active_next = T.cast(T.minimum(
T.maximum(
active + 1,
T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
), log_p_curr.shape[0]), 'int32')
common_factor = T.max(log_p_prev[:active])
p_prev = T.exp(log_p_prev[:active] - common_factor)
_p_prev = zeros[:active_next]
# copy over
_p_prev = T.set_subtensor(_p_prev[:active], p_prev)
# previous transitions
_p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
# skip transitions
_p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
updated_log_p_prev = T.log(_p_prev) + common_factor
log_p_next = T.set_subtensor(
zeros[:active_next],
log_p_curr[:active_next] + updated_log_p_prev
)
return active_next, log_p_next
def test_local_reduce_broadcast_some_0(self):
for fct in [tensor.sum, tensor.all, tensor.any, tensor.prod,
tensor.max, tensor.min]:
x = T.TensorType('int64', (True, False, True))()
f = theano.function([x], [fct(x, axis=[0, 1])], mode=self.mode)
order = f.maker.fgraph.toposort()
assert 1 == sum([isinstance(node.op, T.CAReduce)
for node in order])
node = [node for node in order if isinstance(node.op,
tensor.CAReduce)][0]
op = node.op
assert isinstance(op, T.CAReduce)
# -- the leading broadcastable dimension has been dropped
# by the local_reduce_broadcastable optimization
# now summation is over the original x's dimension 1.
assert node.inputs[0].ndim == 2, node
assert op.axis == (0,), op.axis
def test_optimization(self):
# If we use only the max output, we should replace this op with
# a faster one.
mode = theano.compile.mode.get_default_mode().including(
'canonicalize', 'fast_run')
for axis in [0, 1, -1]:
data = numpy.asarray(numpy.random.rand(2, 3), dtype=config.floatX)
n = tensor.matrix()
f = function([n], tensor.max_and_argmax(n, axis)[0], mode=mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, CAReduce)
f = function([n], tensor.max_and_argmax(n, axis), mode=mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, tensor.MaxAndArgmax)
def sparse_tuple_from(sequences, dtype=np.int32):
"""Create a sparse representention of x.
Args:
sequences: a list of lists of type dtype where each element is a sequence
Returns:
A tuple with (indices, values, shape)
"""
indices = []
values = []
for n, seq in enumerate(sequences):
indices.extend(zip([n]*len(seq), range(len(seq))))
values.extend(seq)
indices = np.asarray(indices, dtype=np.int64)
values = np.asarray(values, dtype=dtype)
shape = np.asarray([len(sequences), np.asarray(indices).max(0)[1]+1], dtype=np.int64)
return indices, values, shape
def sparse_tuple_from(sequences, dtype=np.int32):
"""Create a sparse representention of x.
Args:
sequences: a list of lists of type dtype where each element is a sequence
Returns:
A tuple with (indices, values, shape)
"""
indices = []
values = []
for n, seq in enumerate(sequences):
indices.extend(zip([n]*len(seq), range(len(seq))))
values.extend(seq)
indices = np.asarray(indices, dtype=np.int64)
values = np.asarray(values, dtype=dtype)
shape = np.asarray([len(sequences), np.asarray(indices).max(0)[1]+1], dtype=np.int64)
return indices, values, shape
def log_sum_exp(x, axis=1):
m = T.max(x, axis=axis)
return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))
def softmax_loss(p_true, output_before_softmax):
output_before_softmax -= T.max(output_before_softmax, axis=1, keepdims=True)
if p_true.ndim==2:
return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - T.sum(p_true*output_before_softmax, axis=1))
else:
return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - output_before_softmax[T.arange(p_true.shape[0]),p_true])
def GMM_nll(x, mus, sigmas, mix_weights):
"""
D is dimension of each observation (e.g. frame_size) for each component
(multivariate Normal with diagonal covariance matrix)
See `gaussian_nll`
x : (batch_size, D)
mus : (batch_size, D, num_gaussians)
sigmas : (batch_size, D, num_gaussians)
mix_weights : (batch_size, num_gaussians)
"""
x = x.dimshuffle(0, 1, 'x')
# Similar to `gaussian_nll`
ll_component_wise = lib.floatX(numpy.log(2. * numpy.pi))
ll_component_wise += 2. * T.log(sigmas)
ll_component_wise += ((x - mus) / sigmas) ** 2.
ll_component_wise = ll_component_wise.sum(axis=1) # on FRAME_SIZE
ll_component_wise *= lib.floatX(-0.5) # LL not NLL
# Now ready to take care of weights of each component
# Simply applying exp could potentially cause inf/NaN.
# Look up LogSumExp trick, Softmax in theano, or this:
# hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/
weighted_ll = ll_component_wise + T.log(mix_weights)
ll_max = T.max(weighted_ll, axis=1, keepdims=True)
nll = T.log(T.sum(T.exp(weighted_ll - ll_max), axis=1, keepdims=True))
nll += ll_max
nll = -nll.sum(axis=1)
return nll
def log_sum_exp(x, axis=1):
m = T.max(x, axis=axis)
return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))
def log_sum_exp(x, axis=1):
m = T.max(x, axis=axis)
return m+T.log(T.sum(T.exp(x-m.dimshuffle(0,'x')), axis=axis))
def theano_logsumexp(x, axis=None):
xmax = x.max(axis=axis, keepdims=True)
xmax_ = x.max(axis=axis)
return xmax_ + T.log(T.exp(x - xmax).sum(axis=axis))