def derivative(self, input=None):
"""The derivative of sigmoid is
.. math:: \\frac{dy}{dx} & = (1-\\varphi(x)) \\otimes \\varphi(x) \\\\
& = \\frac{e^{-x}}{(1+e^{-x})^2} \\\\
& = \\frac{e^x}{(1+e^x)^2}
Returns
-------
float32
The derivative of sigmoid function.
"""
last_forward = self.forward(input) if input else self.last_forward
return np.multiply(last_forward, 1 - last_forward)
# sigmoid-end
# tanh-start
python类tanh()的实例源码
def forward(self, input):
"""This function is easily defined as the ratio between the hyperbolic
sine and the cosine functions (or expanded, as the ratio of the
half?difference and half?sum of two exponential functions in the
points :math:`z` and :math:`-z`):
.. math:: tanh(z) & = \\frac{sinh(z)}{cosh(z)} \\\\
& = \\frac{e^z - e^{-z}}{e^z + e^{-z}}
Fortunately, numpy provides :meth:`tanh` methods. So in our implementation,
we directly use :math:`\\varphi(x) = \\tanh(x)`.
Parameters
----------
x : float32
The activation (the summed, weighted input of a neuron).
Returns
-------
float32 in [-1, 1]
The output of the tanh function applied to the activation.
"""
self.last_forward = np.tanh(input)
return self.last_forward
def derivative(self, input=None):
"""The derivative of :meth:`tanh` functions is
.. math:: \\frac{d}{dx} tanh(x) & = \\frac{d}{dx} \\frac{sinh(x)}{cosh(x)} \\\\
& = \\frac{cosh(x) \\frac{d}{dx}sinh(x) - sinh(x) \\frac{d}{dx}cosh(x) }{ cosh^2(x)} \\\\
& = \\frac{ cosh(x) cosh(x) - sinh(x) sinh(x) }{ cosh^2(x)} \\\\
& = 1 - tanh^2(x)
Returns
-------
float32
The derivative of tanh function.
"""
last_forward = self.forward(input) if input else self.last_forward
return 1 - np.power(last_forward, 2)
# tanh-end
# relu-start
def __init__(self, n_out, n_in=None, nb_batch=None, nb_seq=None,
init='glorot_uniform', inner_init='orthogonal',
activation='tanh', return_sequence=False):
self.n_out = n_out
self.n_in = n_in
self.nb_batch = nb_batch
self.nb_seq = nb_seq
self.init = initializations.get(init)
self.inner_init = initializations.get(inner_init)
self.activation_cls = activations.get(activation).__class__
self.activation = activations.get(activation)
self.return_sequence = return_sequence
self.out_shape = None
self.last_input = None
self.last_output = None
def sample(h, seed_ix, n):
"""
sample a sequence of integers from the model
h is memory state, seed_ix is seed letter for first time step
"""
x = np.zeros((vocab_size, 1))
x[seed_ix] = 1
ixes = []
for t in range(n):
h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
y = np.dot(Why, h) + by
p = np.exp(y) / np.sum(np.exp(y))
ix = np.random.choice(range(vocab_size), p=p.ravel())
x = np.zeros((vocab_size, 1))
x[ix] = 1
ixes.append(ix)
return ixes
def sample(h, seed_ix, n):
"""
sample a sequence of integers from the model
h is memory state, seed_ix is seed letter for first time step
"""
x = np.zeros((vocab_size, 1))
x[seed_ix] = 1
ixes = []
for t in range(n):
h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
y = np.dot(Why, h) + by
p = np.exp(y) / np.sum(np.exp(y))
ix = np.random.choice(range(vocab_size), p=p.ravel())
x = np.zeros((vocab_size, 1))
x[ix] = 1
ixes.append(ix)
return ixes
def sample(h, seed_ix, n):
"""
sample a sequence of integers from the model
h is memory state, seed_ix is seed letter for first time step
"""
x = np.zeros((vocab_size, 1))
x[seed_ix] = 1
ixes = []
for t in range(n):
h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
y = np.dot(Why, h) + by
p = np.exp(y) / np.sum(np.exp(y))
ix = np.random.choice(range(vocab_size), p=p.ravel())
x = np.zeros((vocab_size, 1))
x[ix] = 1
ixes.append(ix)
return ixes
def __init__(self, n_in, n_out, activation=tanh,
clip_gradients=False, init_zero=False):
self.n_in = n_in
self.n_out = n_out
self.activation = activation
self.clip_gradients = clip_gradients
#self.in_gate = RecurrentLayer(n_in, n_out, sigmoid, clip_gradients, init_zero)
#self.forget_gate = RecurrentLayer(n_in, n_out, sigmoid, clip_gradients, init_zero)
#self.out_gate = RecurrentLayer(n_in, n_out, sigmoid, clip_gradients, init_zero)
self.in_gate = RecurrentLayer(n_in+n_out, n_out, sigmoid, clip_gradients, init_zero)
self.out_gate = RecurrentLayer(n_in+n_out, n_out, sigmoid, clip_gradients, init_zero)
self.input_layer = RecurrentLayer(n_in, n_out, activation, clip_gradients, init_zero)
self.internal_layers = [ self.input_layer, self.in_gate,
self.out_gate]#, self.forget_gate]
def __init__(self, n_in, n_out, activation=tanh,
order=1, clip_gradients=False, BN=False):
self.n_in = n_in
self.n_out = n_out
self.activation = activation
self.order = order
self.clip_gradients = clip_gradients
# batch, in, row, col
self.input_shape = (None, n_in, 1, None)
# out, in, row, col
self.filter_shape = (n_out, n_in, 1, order)
self.W = create_shared(random_init(self.filter_shape), name="W")
if not BN:
self.bias = create_shared(random_init((n_out,)), name="bias")
self.BNLayer = None
self.BN = BN
if BN:
# calculate appropriate input_shape, (mini_batch_size, # of channel, # row, # column)
new_shape = list(self.input_shape)
new_shape[1] = self.filter_shape[0]
new_shape = tuple(new_shape)
self.BNLayer = BatchNormalization(new_shape, mode=1)
def _feed_forward(self, x):
time_steps = len(x)
initial_hidden_state = np.zeros(self.hidden_layer_size)
hidden_state = deque([initial_hidden_state])
softmax_outputs = deque()
for t in np.arange(time_steps):
hidden_state.append(
np.tanh( self.parameters.W_xh.value[:, x[t]] + self.parameters.W_hh.value @ hidden_state[-1] )
)
softmax_outputs.append(
self._compute_softmax( self.parameters.W_hy.value @ hidden_state[-1] )
)
# move initial hidden state to end of deque, such that it is later our
# `hidden_state[t-1]` at t=0
hidden_state.rotate(-1)
return np.array(softmax_outputs), np.array(hidden_state)
def forward(self, inputs, targets, hidden_prev):
# s = vector
input_xs = {}
hidden_s = {}
output_ys = {}
probs = {} # probablity
hidden_s[-1] = np.copy(hidden_prev)
loss = 0
for i in xrange(len(inputs)):
# Creating an equivalent one hot vector for each inputs
input_xs[i] = np.zeros((self.vocab_size, 1))
input_xs[i][inputs[i]] = 1
# Calculating the current hidden state using the previous hiden state through tanh
hidden_s[i] = self.tanh(self.param_w_xh, input_xs[i], self.param_w_hh, hidden_s[i - 1], self.bias_hidden)
output_ys[i] = np.dot(self.param_w_hy, hidden_s[i]) + self.bias_output_y
probs[i] = self.softmax(output_ys[i])
loss += -np.log(probs[i][targets[i], 0])
return input_xs, output_ys, hidden_s, probs, loss
# backprop
def generate(self, hidden, seed_ix, chars_counter):
input_x = np.zeros((self.vocab_size, 1))
input_x[seed_ix] = 1
ixes = []
for i in xrange(chars_counter):
hidden = np.tanh(np.dot(self.param_w_xh, input_x) + np.dot(self.param_w_hh, hidden) + self.bias_hidden) # tanh
output_y = np.dot(self.param_w_hy, hidden) + self.bias_output_y
prob = self.softmax(output_y)
ix = np.random.choice(range(self.vocab_size), p=prob.ravel())
input_x = np.zeros((self.vocab_size, 1))
input_x[ix] = 1
ixes.append(ix)
return [self.ix_to_char[ix] for ix in ixes]
def __init__(self, in_size, hidden_size, encoder_activation='tanh',
decoder_activation='tanh', decoder_return_sequence=True):
assert encoder_activation in ('tanh', 'identity', ), "invalid encoder_activation"
self.encoder_activation = encoder_activation
assert decoder_activation in ('tanh', 'identity', ), "invalid decoder_activation"
self.decoder_activation = decoder_activation
self.hidden_size = hidden_size
self.in_size = in_size
# encoder
self.Wxh_enc = np.zeros((hidden_size, in_size)) # input to hidden
self.Whh_enc = np.zeros((hidden_size, hidden_size)) # hidden to hidden
self.bh_enc = np.zeros((hidden_size, 1)) # hidden bias
# decoder
self.Wxh_dec = np.zeros((hidden_size, in_size)) # input to hidden
self.Whh_dec = np.zeros((hidden_size, hidden_size)) # hidden to hidden
self.bh_dec = np.zeros((hidden_size, 1)) # hidden bias
self.decoder_return_sequence = decoder_return_sequence
def density_profile(rho):
"""density profile, fixed in time.
Inputs:
rho normalized radial coordinate rho=r/a (array)
Outputs:
T density profile in SI (array)
"""
minorRadius = 0.594 # a
majorRadius = 1.65 # R0
inverseAspectRatio = minorRadius / majorRadius
rho0 = 0.5
# density profile
n0 = 3.3e19; # in SI, m^-3
kappa_n = 2.22; # R0 / Ln
deltar = 0.5
rhominus = rho - rho0 + deltar/2
deltan = 0.1
n = n0 * np.exp( -kappa_n * inverseAspectRatio * (rho - rho0 - deltan * (np.tanh(rhominus/deltan) - np.tanh(deltar/2/deltan))))
# set n to a constant for rho < rho0-deltar/2
ind = int(np.abs(rho - (rho0 - deltar/2)).argmin())
ind2 = (rho < (rho0-deltar/2))
n[ind2] = n[ind];
return n
def temperature_initial_condition(rho):
"""Initial temperature profile
Inputs:
rho normalized radial coordinate rho=r/a (array)
Outputs:
T temperature profile in SI (array)
"""
e = 1.60217662e-19 # electron charge
kappa_T = 6.96
deltar = 0.9
rho0 = 0.5
rhominus = rho - rho0 + deltar/2
deltaT = 0.1
e = 1.60217662e-19
T0 = 1000*e
invasp = 0.36
T = T0 * np.exp( -kappa_T * invasp * (rho - rho0 - deltaT * (np.tanh(rhominus/deltaT) - np.tanh(deltar/2/deltaT))));
ind = int(np.abs(rho - (rho0 - deltar/2)).argmin())
ind2 = (rho < (rho0-deltar/2));
T[ind2] = T[ind];
return T
def test_basic(self):
with tf.Graph().as_default(), self.test_session() as sess:
rnd = np.random.RandomState(0)
x = self.get_random_tensor([18, 12], rnd=rnd)
y = tf.tanh(x)
self.assert_bw_fw(sess, x, y, rnd=rnd)
def test_manual(self):
with tf.Graph().as_default(), tf.device("/cpu:0"):
with self.test_session() as sess:
x_val = np.random.uniform(0, 1)
x = tf.constant(x_val)
y = tf.tanh(x)
dy_dx = forward_gradients(y, x, gate_gradients=True)
dy_dx_tf = sess.run(dy_dx)
eps = 1e-5
x_val = x_val - eps
y_val_1 = np.tanh(x_val)
x_val = x_val + 2 * eps
y_val_2 = np.tanh(x_val)
dy_dx_fd = (y_val_2 - y_val_1) / (2 * eps)
np.testing.assert_allclose(dy_dx_tf, dy_dx_fd, rtol=1e-5)
def calculate_loss(self, X, y, model):
num_examples = len(X)
lamda = 0.01 # regularization strength
Wi, bh, Wh, bo = model['Wi'], model['bh'], model['Wh'], model['bo']
# Forward propagation to calculate our predictions
neth = np.dot(X, Wi) + bh
lh = np.tanh(neth)
neto = np.dot(lh, Wh) + bo
lo = np.exp(neto)
probs = lo / np.sum(lo, axis=1, keepdims=True)
# Calculating the loss
corect_logprobs = -np.log(probs[range(num_examples), y])
data_loss = np.sum(corect_logprobs)
# Add regulatization term to loss (optional)
data_loss += lamda/2 * (np.sum(np.square(Wi)) + np.sum(np.square(Wh)))
return 1./num_examples * data_loss
# ??
def actFctDerFromOutput(x):
"""
Derivate of the activation function
WARNING: In this version, we take as input an output value
after the activation function (x = tanh(output of the tensor)).
This works because the derivate of tanh is function of tanh
"""
return 1.0 - x**2
#def actFctDer(x):
#"""
#Derivate of the activation function
#"""
#return 1.0 - np.tanh(x)**2
# Other utils functions
def conditional_logdensities(self, x_lt_i, range):
raise(Exception("Not implemented"))
W = self.W.get_value()
V_alpha = self.V_alpha.get_value()
b_alpha = self.b_alpha.get_value()
V_mu = self.V_mu.get_value()
b_mu = self.b_mu.get_value()
V_sigma = self.V_sigma.get_value()
b_sigma = self.b_sigma.get_value()
activation_rescaling = self.activation_rescaling.get_value()
# Calculate
i = len(x_lt_i)
a = W[0, :] + np.dot(x_lt_i, W[1:len(x_lt_i) + 1, :])
h = self.parameters["nonlinearity"].get_numpy_f()(a * activation_rescaling[i])
alpha = Utils.nnet.softmax(np.tanh(np.dot(h, V_alpha[i]) + b_alpha[i]) * 10.0) # C
Mu = np.dot(h, V_mu[i]) + b_mu[i] # C
Sigma = np.log(1.0 + np.exp((np.dot(h, V_sigma[i]) + b_sigma[i]) * 10)) / 10 # C
def ld(x):
lds = np.array([scipy.stats.norm.logpdf(x, Mu[c], Sigma[c]) for c in xrange(self.n_components)])
return Utils.nnet.logsumexp(lds + np.log(alpha))
return np.array([ld(x) for x in range])
def bottom_data_is(self, x, s_prev = None, h_prev = None):
# if this is the first lstm node in the network
if s_prev == None: s_prev = np.zeros_like(self.state.s)
if h_prev == None: h_prev = np.zeros_like(self.state.h)
# save data for use in backprop
self.s_prev = s_prev
self.h_prev = h_prev
# concatenate x(t) and h(t-1)
xc = np.hstack((x, h_prev))
self.state.g = np.tanh(np.dot(self.param.wg, xc) + self.param.bg)
self.state.i = sigmoid(np.dot(self.param.wi, xc) + self.param.bi)
self.state.f = sigmoid(np.dot(self.param.wf, xc) + self.param.bf)
self.state.o = sigmoid(np.dot(self.param.wo, xc) + self.param.bo)
self.state.s = self.state.g * self.state.i + s_prev * self.state.f
self.state.h = self.state.s * self.state.o
self.x = x
self.xc = xc
def nonlin_poly(self, u):
"""nonlin_poly
ip2d.motortransfer_func legacy
"""
# olimm1 = 0.5
olim = 2
# z = array([ 0.27924011, 0.12622341, 0.0330395, -0.00490162])
# z = array([ 0.00804775, 0.00223221, -0.1456263, -0.04297434, 0.74612441, 0.26178644, -0.01953301, -0.00243736])
# FIXME: somewhere there's a spearate script for generating the coeffs
z = array([9.46569349e-04, 4.84698808e-03, -1.64436822e-02, -8.76479549e-02, 7.67630339e-02, 4.48107332e-01, -4.53365904e-03, -2.69288039e-04, 1.18423789e-15])
p3 = poly1d(z)
# print "pre", self.ip2d.u[ti]
# self.ip2d.u[ti] = p3(tanh(self.ip2d.u[ti]) * self.olim)
y = p3(tanh(u) * olim)
return y
def make_nn_funs(layer_sizes, L2_reg):
parser = WeightsParser()
for i, shape in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
parser.add_weights(('weights', i), shape)
parser.add_weights(('biases', i), (1, shape[1]))
def predictions(W_vect, X):
cur_units = X
for i in range(len(layer_sizes) - 1):
cur_W = parser.get(W_vect, ('weights', i))
cur_B = parser.get(W_vect, ('biases', i))
cur_units = np.tanh(np.dot(cur_units, cur_W) + cur_B)
return cur_units - logsumexp(cur_units, axis=1)
def loss(W_vect, X, T):
log_prior = -L2_reg * np.dot(W_vect, W_vect)
log_lik = np.sum(predictions(W_vect, X) * T)
return - log_prior - log_lik
def frac_err(W_vect, X, T):
return np.mean(np.argmax(T, axis=1) != np.argmax(pred_fun(W_vect, X), axis=1))
return parser.N, predictions, loss, frac_err
def forward_prop_step(self,x_t, s_t1_prev, s_t2_prev):
# This is how we calculated the hidden state in a simple RNN. No longer!
# s_t = T.tanh(U[:,x_t] + W.dot(s_t1_prev))
# Word embedding layer
x_e = self.E.dot(x_t)
# GRU Layer 1
z_t1 = sigmoid(self.U[0].dot(x_e) + self.W[0].dot(s_t1_prev) + self.b[0])
r_t1 = sigmoid(self.U[1].dot(x_e) + self.W[1].dot(s_t1_prev) + self.b[1])
c_t1 = np.tanh(self.U[2].dot(x_e) + self.W[2].dot(s_t1_prev * r_t1) + self.b[2])
s_t1 = (np.ones(z_t1.shape) - z_t1) * c_t1 + z_t1 * s_t1_prev
# GRU Layer 2
z_t2 = sigmoid(self.U[3].dot(s_t1) + self.W[3].dot(s_t2_prev) + self.b[3])
r_t2 = sigmoid(self.U[4].dot(s_t1) + self.W[4].dot(s_t2_prev) + self.b[4])
c_t2 = np.tanh(self.U[5].dot(s_t1) + self.W[5].dot(s_t2_prev * r_t2) + self.b[5])
s_t2 = (np.ones(z_t2.shape) - z_t2) * c_t2 + z_t2 * s_t2_prev
# Final output calculation
o_t = self.V.dot(s_t2) + self.c
return [o_t, s_t1, s_t2]
def forward(self, inputs):
c_prev, x = inputs
a, i, f, o = _extract_gates(x)
if isinstance(x, numpy.ndarray):
self.a = numpy.tanh(a)
self.i = _sigmoid(i)
self.f = _sigmoid(f)
self.o = _sigmoid(o)
self.c = self.a * self.i + self.f * c_prev
h = self.o * numpy.tanh(self.c)
else:
self.c, h = cuda.elementwise(
'T c_prev, T a, T i_, T f, T o', 'T c, T h',
'''
COMMON_ROUTINE;
c = aa * ai + af * c_prev;
h = ao * tanh(c);
''',
'lstm_fwd', preamble=_preamble)(c_prev, a, i, f, o)
return self.c, h
def check_forward(self, c_prev_data, x_data):
c_prev = chainer.Variable(c_prev_data)
x = chainer.Variable(x_data)
c, h = functions.lstm(c_prev, x)
self.assertEqual(c.data.dtype, self.dtype)
self.assertEqual(h.data.dtype, self.dtype)
# Compute expected out
a_in = self.x[:, [0, 4]]
i_in = self.x[:, [1, 5]]
f_in = self.x[:, [2, 6]]
o_in = self.x[:, [3, 7]]
c_expect = _sigmoid(i_in) * numpy.tanh(a_in) + \
_sigmoid(f_in) * self.c_prev
h_expect = _sigmoid(o_in) * numpy.tanh(c_expect)
gradient_check.assert_allclose(
c_expect, c.data, **self.check_forward_options)
gradient_check.assert_allclose(
h_expect, h.data, **self.check_forward_options)
def test(self):
with self.test_session() as sess:
inp = tf.constant(np.array([
[[1.0], [2.0], [2.0], [0.0]]
], dtype=np.float32))
x = tdnn(inp, [2], [1])
result = sess.run(x, {
'TDNN/kernel_2/w:0': np.array([[[[1.0]], [[-1.0]]]]),
'TDNN/kernel_2/b:0': np.array([1.0]),
})
print(result)
self.assertAllClose(result, [[np.tanh(3.0)]])
def test_cnn_step(self):
with self.test_session() as sess:
m = self.model()
input_cnn = sess.run(m.input_cnn, {
'TDNN/kernel_2/w:0': np.array([[
[[1], [1], [1]],
[[1], [1], [1]],
]]),
'TDNN/kernel_2/b:0': np.array([0]),
m.input_embedded: np.array([[
[1,0,0], [0,0,1], [0,1,0], [0,0,0], [0,0,0],
]])
})
self.assertAllClose(input_cnn, np.array([
[[np.tanh(2)]],
]))
def bottom_data_is(self, x, s_prev=None, h_prev=None):
# if this is the first lstm node in the network
if s_prev is None:
s_prev = np.zeros_like(self.state.s)
if h_prev is None:
h_prev = np.zeros_like(self.state.h)
# save data for use in backprop
self.s_prev = s_prev
self.h_prev = h_prev
# concatenate x(t) and h(t-1)
xc = np.hstack((x, h_prev))
self.state.g = np.tanh(np.dot(self.param.wg, xc) + self.param.bg)
self.state.i = sigmoid(np.dot(self.param.wi, xc) + self.param.bi)
self.state.f = sigmoid(np.dot(self.param.wf, xc) + self.param.bf)
self.state.o = sigmoid(np.dot(self.param.wo, xc) + self.param.bo)
self.state.s = self.state.g * self.state.i + s_prev * self.state.f
self.state.h = self.state.s * self.state.o
self.x = x
self.xc = xc
def true_f(self, x):
return 1. * (1. + x) * np.sin(10. * np.tanh(x))
def repair_genotype(self, x, copy_if_changed=False):
"""make sure that solutions fit to the sample distribution.
This interface is versatile and likely to change.
In particular the frequency of ``x - self.mean`` being long in
Mahalanobis distance is limited, currently clipping at
``N**0.5 + 2 * N / (N + 2)`` is implemented.
"""
x = array(x, copy=False)
mold = array(self.mean, copy=False)
if 1 < 3: # hard clip at upper_length
upper_length = self.N**0.5 + 2 * self.N / (self.N + 2)
# should become an Option, but how? e.g. [0, 2, 2]
fac = self.mahalanobis_norm(x - mold) / upper_length
if fac > 1:
if copy_if_changed:
x = (x - mold) / fac + mold
else: # should be 25% faster:
x -= mold
x /= fac
x += mold
# print self.countiter, k, fac, self.mahalanobis_norm(pop[k] - mold)
# adapt also sigma: which are the trust-worthy/injected solutions?
elif 11 < 3:
return np.exp(np.tanh(((upper_length * fac)**2 / self.N - 1) / 2) / 2)
else:
if 'checktail' not in self.__dict__: # hasattr(self, 'checktail')
raise NotImplementedError
# from check_tail_smooth import CheckTail # for the time being
# self.checktail = CheckTail()
# print('untested feature checktail is on')
fac = self.checktail.addchin(self.mahalanobis_norm(x - mold))
if fac < 1:
x = fac * (x - mold) + mold
return x