def create_model(inp):
out = (inp.astype(theano.config.floatX)/lib.floatX(Q_LEVELS-1) - lib.floatX(0.5))
l_out = out.dimshuffle(0,1,'x')
skips = []
for i in range(args.wavenet_blocks):
l_out, skip_out = create_wavenet_block(l_out, args.dilation_layers_per_block, 1 if i == 0 else args.dim, args.dim, name = "block_{}".format(i+1))
skips.append(skip_out)
out = skips[-1]
for i in range(args.wavenet_blocks - 1):
out = out + skips[args.wavenet_blocks - 2 - i][:,(2**args.dilation_layers_per_block - 1)*(i+1):]
for i in range(3):
out = lib.ops.conv1d("out_{}".format(i+1), out, args.dim, args.dim, 1, non_linearity='relu')
out = lib.ops.conv1d("final", out, args.dim, args.q_levels, 1, non_linearity='identity')
return out
python类floatX()的实例源码
def gaussian_nll(x, mus, sigmas):
"""
NLL for Multivariate Normal with diagonal covariance matrix
See:
wikipedia.org/wiki/Multivariate_normal_distribution#Likelihood_function
where \Sigma = diag(s_1^2,..., s_n^2).
x, mus, sigmas all should have the same shape.
sigmas (s_1,..., s_n) should be strictly positive.
Results in output shape of similar but without the last dimension.
"""
nll = lib.floatX(numpy.log(2. * numpy.pi))
nll += 2. * T.log(sigmas)
nll += ((x - mus) / sigmas) ** 2.
nll = nll.sum(axis=-1)
nll *= lib.floatX(0.5)
return nll
def gaussian_nll(x, mus, sigmas):
"""
NLL for Multivariate Normal with diagonal covariance matrix
See:
wikipedia.org/wiki/Multivariate_normal_distribution#Likelihood_function
where \Sigma = diag(s_1^2,..., s_n^2).
x, mus, sigmas all should have the same shape.
sigmas (s_1,..., s_n) should be strictly positive.
Results in output shape of similar but without the last dimension.
"""
nll = lib.floatX(numpy.log(2. * numpy.pi))
nll += 2. * T.log(sigmas)
nll += ((x - mus) / sigmas) ** 2.
nll = nll.sum(axis=-1)
nll *= lib.floatX(0.5)
return nll
def recurrent_fn_hred(x_t, h_tm1,hidden_dim,W1,b1,W2,b2):
global DIM
#A1 = T.nnet.sigmoid(lib.ops.BatchNorm(T.dot(T.concatenate((x_t,h_tm1),axis=1),W1),name="FrameLevel.GRU"+str(name)+".Input.",length=2*512) + b1)
A1 = T.nnet.sigmoid(T.dot(T.concatenate((x_t,h_tm1),axis=1),W1) + b1)
z = A1[:,:hidden_dim]
r = A1[:,hidden_dim:]
scaled_hidden = r*h_tm1
#h = T.tanh(lib.ops.BatchNorm(T.dot(T.concatenate((scaled_hidden,x_t),axis=1),W2),name="FrameLevel.GRU"+str(name)+".Output.",length=512)+b2)
h = T.tanh(T.dot(T.concatenate((scaled_hidden,x_t),axis=1),W2) + b2)
one = lib.floatX(1.0)
return ((z * h) + ((one - z) * h_tm1)).astype('float32')
def Adam(cost, params, lr=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8,gradClip=True,value=1.):
gparams = []
iter = 1
for param in params:
gparam = T.grad(cost,param)
if gradClip:
gparam = T.clip(gparam,lib.floatX(-value), lib.floatX(value))
gparams.append(gparam)
print str(iter) + " completed"
iter += 1
updates = []
for p, g in zip(params, gparams):
m = theano.shared(p.get_value() * 0.)
v = theano.shared(p.get_value() * 0.)
m_new = beta1 * m + (1 - beta1) * g
v_new = beta2 * v + (1 - beta2) * (g ** 2)
gradient_scaling = T.sqrt(v_new + epsilon)
updates.append((m, m_new))
updates.append((v, v_new))
updates.append((p, p - lr * m / gradient_scaling))
return updates
def DiagonalBiLSTM(name, input_dim, inputs):
"""
inputs.shape: (batch size, height, width, input_dim)
inputs.shape: (batch size, height, width, DIM)
"""
forward = DiagonalLSTM(name+'.Forward', input_dim, inputs)
backward = DiagonalLSTM(name+'.Backward', input_dim, inputs[:,:,::-1,:])[:,:,::-1,:]
batch_size = inputs.shape[0]
backward = T.concatenate([
T.zeros([batch_size, 1, WIDTH, DIM], dtype=theano.config.floatX),
backward[:, :-1, :, :]
], axis=1)
return forward + backward
# inputs.shape: (batch size, height, width, channels)
def create_wavenet_block(inp, num_dilation_layer, input_dim, output_dim, name =None):
assert name is not None
layer_out = inp
skip_contrib = []
skip_weights = lib.param(name+".parametrized_weights", lib.floatX(numpy.ones((num_dilation_layer,))))
for i in range(num_dilation_layer):
layer_out, skip_c = lib.ops.dil_conv_1D(
layer_out,
output_dim,
input_dim if i == 0 else output_dim,
2,
dilation = 2**i,
non_linearity = 'gated',
name = name+".dilation_{}".format(i+1)
)
skip_c = skip_c*skip_weights[i]
skip_contrib.append(skip_c)
skip_out = skip_contrib[-1]
j = 0
for i in range(num_dilation_layer-1):
j += 2**(num_dilation_layer-i-1)
skip_out = skip_out + skip_contrib[num_dilation_layer-2 - i][:,j:]
return layer_out, skip_out
def uniform(stdev, size):
"""
uniform distribution with the given stdev and size
From Ishaan's code:
https://github.com/igul222/speech
"""
return numpy.random.uniform(
low=-stdev * numpy.sqrt(3),
high=stdev * numpy.sqrt(3),
size=size
).astype(theano.config.floatX)
def Embedding(name, n_symbols, output_dim, indices):
vectors = lib.param(
name,
numpy.random.randn(
n_symbols,
output_dim
).astype(theano.config.floatX)
)
output_shape = [
indices.shape[i]
for i in xrange(indices.ndim)
] + [output_dim]
return vectors[indices.flatten()].reshape(output_shape)
def softmax_and_sample(logits):
old_shape = logits.shape
flattened_logits = logits.reshape((-1, logits.shape[logits.ndim-1]))
samples = T.cast(
srng.multinomial(pvals=T.nnet.softmax(flattened_logits)),
theano.config.floatX
).reshape(old_shape)
return T.argmax(samples, axis=samples.ndim-1)
# TODO: Have a look at this benchmark:
# https://github.com/MaximumEntropy/cudnn_rnn_theano_benchmarks
def GMM_nll(x, mus, sigmas, mix_weights):
"""
D is dimension of each observation (e.g. frame_size) for each component
(multivariate Normal with diagonal covariance matrix)
See `gaussian_nll`
x : (batch_size, D)
mus : (batch_size, D, num_gaussians)
sigmas : (batch_size, D, num_gaussians)
mix_weights : (batch_size, num_gaussians)
"""
x = x.dimshuffle(0, 1, 'x')
# Similar to `gaussian_nll`
ll_component_wise = lib.floatX(numpy.log(2. * numpy.pi))
ll_component_wise += 2. * T.log(sigmas)
ll_component_wise += ((x - mus) / sigmas) ** 2.
ll_component_wise = ll_component_wise.sum(axis=1) # on FRAME_SIZE
ll_component_wise *= lib.floatX(-0.5) # LL not NLL
# Now ready to take care of weights of each component
# Simply applying exp could potentially cause inf/NaN.
# Look up LogSumExp trick, Softmax in theano, or this:
# hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/
weighted_ll = ll_component_wise + T.log(mix_weights)
ll_max = T.max(weighted_ll, axis=1, keepdims=True)
nll = T.log(T.sum(T.exp(weighted_ll - ll_max), axis=1, keepdims=True))
nll += ll_max
nll = -nll.sum(axis=1)
return nll
def T_one_hot(inp_tensor, n_classes):
"""
:todo:
- Implement other methods from here:
- Compare them speed-wise for different sizes
- Implement N_one_hot for Numpy version, with speed tests.
Theano one-hot (1-of-k) from an input tensor of indecies.
If the indecies are of the shape (a0, a1, ..., an) the output
shape would be (a0, a1, ..., a2, n_classes).
:params:
- inp_tensor: any theano tensor with dtype int* as indecies and all of
them between [0, n_classes-1].
- n_classes: number of classes which determines the output size.
:usage:
>>> idx = T.itensor3()
>>> idx_val = numpy.array([[[0,1,2,3],[4,5,6,7]]], dtype='int32')
>>> one_hot = T_one_hot(t, 8)
>>> one_hot.eval({idx:idx_val})
>>> print out
array([[[[ 1., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 1., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 1., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 1., 0., 0., 0., 0.]],
[[ 0., 0., 0., 0., 1., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 1., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 1., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 1.]]]])
>>> print idx_val.shape, out.shape
(1, 2, 4) (1, 2, 4, 8)
"""
flattened = inp_tensor.flatten()
z = T.zeros((flattened.shape[0], n_classes), dtype=theano.config.floatX)
one_hot = T.set_subtensor(z[T.arange(flattened.shape[0]), flattened], 1)
out_shape = [inp_tensor.shape[i] for i in xrange(inp_tensor.ndim)] + [n_classes]
one_hot = one_hot.reshape(out_shape)
return one_hot
def FrameProcessor(frames):
"""
frames.shape: (batch size, n frames, FRAME_SIZE)
output.shape: (batch size, n frames, DIM)
"""
embedded = lib.ops.Embedding('FrameEmbedding', Q_LEVELS, Q_LEVELS, frames)
embedded = embedded.reshape((frames.shape[0], frames.shape[1], Q_LEVELS * FRAME_SIZE))
output = MLP('FrameProcessor', FRAME_SIZE*Q_LEVELS, DIM, embedded)
return output
# frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
# frames *= lib.floatX(2)
# output = MLP('FrameProcessor', FRAME_SIZE, DIM, frames)
# return output
def Recurrence(processed_frames, h0, reset):
"""
processed_frames.shape: (batch size, n frames, DIM)
h0.shape: (batch size, N_GRUS, DIM)
reset.shape: ()
output.shape: (batch size, n frames, DIM)
"""
# print "warning no recurrence"
# return T.zeros_like(processed_frames), h0
learned_h0 = lib.param(
'Recurrence.h0',
numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
)
learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
h0 = theano.ifelse.ifelse(reset, learned_h0, h0)
gru0 = lib.ops.LowMemGRU('Recurrence.GRU0', DIM, DIM, processed_frames, h0=h0[:, 0])
grus = [gru0]
for i in xrange(1, N_GRUS):
gru = lib.ops.LowMemGRU('Recurrence.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i])
grus.append(gru)
last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)
return (grus[-1], last_hidden)
def Embedding(name, n_symbols, output_dim, indices):
vectors = lib.param(
name,
numpy.random.randn(
n_symbols,
output_dim
).astype(theano.config.floatX)
)
output_shape = [
indices.shape[i]
for i in xrange(indices.ndim)
] + [output_dim]
return vectors[indices.flatten()].reshape(output_shape)
def softmax_and_sample(logits):
old_shape = logits.shape
flattened_logits = logits.reshape((-1, logits.shape[logits.ndim-1]))
samples = T.cast(
srng.multinomial(pvals=T.nnet.softmax(flattened_logits)),
theano.config.floatX
).reshape(old_shape)
return T.argmax(samples, axis=samples.ndim-1)
def Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None):
if not isinstance(inputs, list):
inputs = [inputs]
if not isinstance(hidden_dims, list):
hidden_dims = [hidden_dims]
if h0s is None:
h0s = [None]*len(hidden_dims)
for i in xrange(len(hidden_dims)):
if h0s[i] is None:
h0_unbatched = lib.param(
name + '.h0_' + str(i),
numpy.zeros((hidden_dims[i],), dtype=theano.config.floatX)
)
num_batches = inputs[0].shape[1]
h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i])
h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim)
outputs, _ = theano.scan(
step_fn,
sequences=inputs,
outputs_info=h0s,
non_sequences=non_sequences
)
return outputs
def Recurrence(processed_frames, h0, reset):
"""
processed_frames.shape: (batch size, n frames, DIM)
h0.shape: (batch size, N_GRUS, DIM)
reset.shape: ()
output.shape: (batch size, n frames, DIM)
"""
# print "warning no recurrence"
# return T.zeros_like(processed_frames), h0
learned_h0 = lib.param(
'Recurrence.h0',
numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX)
)
learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
h0 = theano.ifelse.ifelse(reset, learned_h0, h0)
gru0 = lib.ops.LowMemGRU('Recurrence.GRU0', DIM, DIM, processed_frames, h0=h0[:, 0])
grus = [gru0]
for i in xrange(1, N_GRUS):
gru = lib.ops.LowMemGRU('Recurrence.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i])
grus.append(gru)
last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)
return (grus[-1], last_hidden)
def gaussian_nll(x, mu, log_sigma):
sigma_squared = T.exp(2*log_sigma)
return (
lib.floatX(0.5*numpy.log(2*numpy.pi)) +
(2*log_sigma) +
( ((x-mu)**2) / (2*sigma_squared) )
)
def uniform(stdev, size):
"""
uniform distribution with the given stdev and size
From Ishaan's code:
https://github.com/igul222/speech
"""
return numpy.random.uniform(
low=-stdev * numpy.sqrt(3),
high=stdev * numpy.sqrt(3),
size=size
).astype(theano.config.floatX)
def Embedding(name, n_symbols, output_dim, indices):
vectors = lib.param(
name,
numpy.random.randn(
n_symbols,
output_dim
).astype(theano.config.floatX)
)
output_shape = [
indices.shape[i]
for i in xrange(indices.ndim)
] + [output_dim]
return vectors[indices.flatten()].reshape(output_shape)
def softmax_and_sample(logits, temperature=1.):
"""
:temperature: default 1.
For high temperatures (temperature -> +Inf), all actions have nearly the same
probability and the lower the temperature, the more expected rewards affect
the probability. For a low temperature (temperature -> 0+), the probability of
the action with the highest expected reward (max operation) tends to 1.
"""
temperature = lib.floatX(temperature)
ZEROX = lib.floatX(0.)
assert temperature >= ZEROX, "`temperature` should be a non-negative value!"
old_shape = logits.shape
flattened_logits = logits.reshape((-1, logits.shape[logits.ndim-1]))
if temperature == ZEROX:
# Get max instead of (biased) sample.
# Equivalent to directly get the argmax but with this it's easier to
# extract the probabilities later on too.
samples = T.nnet.softmax(flattened_logits)
else: # > 0
flattened_logits /= temperature
samples = T.cast(
srng.multinomial(pvals=T.nnet.softmax(flattened_logits)),
theano.config.floatX
)
samples = samples.reshape(old_shape)
return T.argmax(samples, axis=samples.ndim-1)
def __Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None):
if not isinstance(inputs, list):
inputs = [inputs]
if not isinstance(hidden_dims, list):
hidden_dims = [hidden_dims]
if h0s is None:
h0s = [None]*len(hidden_dims)
for i in xrange(len(hidden_dims)):
if h0s[i] is None:
h0_unbatched = lib.param(
name + '.h0_' + str(i),
numpy.zeros((hidden_dims[i],), dtype=theano.config.floatX)
)
num_batches = inputs[0].shape[1]
h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i])
h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim)
outputs, _ = theano.scan(
step_fn,
sequences=inputs,
outputs_info=h0s,
non_sequences=non_sequences
)
return outputs
def T_one_hot(inp_tensor, n_classes):
"""
:todo:
- Implement other methods from here:
- Compare them for speed-wise for different sizes
- Implement N_one_hot for Numpy version, with speed tests.
Theano one-hot (1-of-k) from an input tensor of indecies.
If the indecies are of the shape (a0, a1, ..., an) the output
shape would be (a0, a1, ..., a2, n_classes).
:params:
- inp_tensor: any theano tensor with dtype int* as indecies and all of
them between [0, n_classes-1].
- n_classes: number of classes which determines the output size.
:usage:
>>> idx = T.itensor3()
>>> idx_val = numpy.array([[[0,1,2,3],[4,5,6,7]]], dtype='int32')
>>> one_hot = T_one_hot(t, 8)
>>> one_hot.eval({idx:idx_val})
>>> print out
array([[[[ 1., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 1., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 1., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 1., 0., 0., 0., 0.]],
[[ 0., 0., 0., 0., 1., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 1., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 1., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 1.]]]])
>>> print idx_val.shape, out.shape
(1, 2, 4) (1, 2, 4, 8)
"""
flattened = inp_tensor.flatten()
z = T.zeros((flattened.shape[0], n_classes), dtype=theano.config.floatX)
one_hot = T.set_subtensor(z[T.arange(flattened.shape[0]), flattened], 1)
out_shape = [inp_tensor.shape[i] for i in xrange(inp_tensor.ndim)] + [n_classes]
one_hot = one_hot.reshape(out_shape)
return one_hot
def relu(x):
# Using T.nnet.relu gives me NaNs. No idea why.
return T.switch(x > lib.floatX(0), x, lib.floatX(0))
def sample_from_softmax(softmax_var):
#softmax_var assumed to be of shape (batch_size, num_classes)
old_shape = softmax_var.shape
softmax_var_reshaped = softmax_var.reshape((-1,softmax_var.shape[softmax_var.ndim-1]))
return T.argmax(
T.cast(
srng.multinomial(pvals=softmax_var_reshaped),
theano.config.floatX
).reshape(old_shape),
axis = softmax_var.ndim-1
)
# inputs.shape: (batch size, length, input_dim)
def floatX(num):
if theano.config.floatX == 'float32':
return np.float32(num)
else:
raise Exception("{} type not supported".format(theano.config.floatX))
def binarize(images):
"""
Stochastically binarize values in [0, 1] by treating them as p-values of
a Bernoulli distribution.
"""
return (
np.random.uniform(size=images.shape) < images
).astype(theano.config.floatX)
def floatX(num):
if theano.config.floatX == 'float32':
return np.float32(num)
else:
raise Exception("{} type not supported".format(theano.config.floatX))
def binarize(images):
"""
Stochastically binarize values in [0, 1] by treating them as p-values of
a Bernoulli distribution.
"""
return (
np.random.uniform(size=images.shape) < images
).astype(theano.config.floatX)