def euclidean_distance(vects):
"""
calculate the euclidean distance between two vectors.
Parameters
----------
vects: list
List of two vectors to calculate the euclidean distance
Returns
-------
euclidean_distance : float.
Euclidean distance between two vectors
"""
eps = 1e-08
x, y = vects
return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), eps))
python类sqrt()的实例源码
def gradient_penalty_loss(y_true, y_pred, averaged_samples, gradient_penalty_weight):
"""Calculates the gradient penalty loss for a batch of "averaged" samples.
In Improved WGANs, the 1-Lipschitz constraint is enforced by adding a term to the loss function
that penalizes the network if the gradient norm moves away from 1. However, it is impossible to evaluate
this function at all points in the input space. The compromise used in the paper is to choose random points
on the lines between real and generated samples, and check the gradients at these points. Note that it is the
gradient w.r.t. the input averaged samples, not the weights of the discriminator, that we're penalizing!
In order to evaluate the gradients, we must first run samples through the generator and evaluate the loss.
Then we get the gradients of the discriminator w.r.t. the input averaged samples.
The l2 norm and penalty can then be calculated for this gradient.
Note that this loss function requires the original averaged samples as input, but Keras only supports passing
y_true and y_pred to loss functions. To get around this, we make a partial() of the function with the
averaged_samples argument, and use that for model training."""
gradients = K.gradients(K.sum(y_pred), averaged_samples)
gradient_l2_norm = K.sqrt(K.sum(K.square(gradients)))
gradient_penalty = gradient_penalty_weight * K.square(1 - gradient_l2_norm)
return gradient_penalty
def visualize(model, layer_name):
print 'Model loaded.'
layer_dict = dict([(layer.name, layer) for layer in model.layers])
for filter_index in sample(range(0, layer_dict[layer_name].nb_filter),10):
layer_output = layer_dict[layer_name].output
loss = K.mean(layer_output[:, filter_index, :, :])
grads = K.gradients(loss, model.layers[0].input)[0]
grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)
iterate = K.function([model.layers[0].input, K.learning_phase()], [loss, grads])
input_img_data = np.asarray([read_image('visimage.jpg')])
for _ in xrange(100):
loss_value, grads_value = iterate([input_img_data, 0])
input_img_data += grads_value * 3
img = deprocess_image(input_img_data[0])
write_image(img, '../activations/out{}.jpg'.format(filter_index))
def call(self, x, mask=None):
# x should be an output and a target
assert len(x) == 2
losses = _per_sample_loss(self.loss, mask, x)
if self.fast:
grads = K.sqrt(sum([
K.sum(K.square(g), axis=1)
for g in K.gradients(losses, self.parameter_list)
]))
else:
nb_samples = K.shape(losses)[0]
grads = K.map_fn(
lambda i: self._grad_norm(losses[i]),
K.arange(0, nb_samples),
dtype=K.floatx()
)
return K.reshape(grads, (-1, 1))
def call(self, x, mask=None):
# x should be an output and a target
assert len(x) == 2
losses = _per_sample_loss(self.loss, mask, x)
if self.fast:
grads = K.sqrt(sum([
K.sum(K.square(g), axis=1)
for g in K.gradients(losses, self.parameter_list)
]))
else:
nb_samples = K.shape(losses)[0]
grads = K.map_fn(
lambda i: self._grad_norm(losses[i]),
K.arange(0, nb_samples),
dtype=K.floatx()
)
return K.reshape(grads, (-1, 1))
def call(self, x, mask=None):
# x should be an output and a target
assert len(x) == 2
losses = _per_sample_loss(self.loss, mask, x)
if self.fast:
grads = K.sqrt(sum([
K.sum(K.square(g), axis=1)
for g in K.gradients(losses, self.parameter_list)
]))
else:
nb_samples = K.shape(losses)[0]
grads = K.map_fn(
lambda i: self._grad_norm(losses[i]),
K.arange(0, nb_samples),
dtype=K.floatx()
)
return K.reshape(grads, (-1, 1))
def call(self, x, mask=None):
# x should be an output and a target
assert len(x) == 2
losses = _per_sample_loss(self.loss, mask, x)
if self.fast:
grads = K.sqrt(sum([
K.sum(K.square(g), axis=1)
for g in K.gradients(losses, self.parameter_list)
]))
else:
nb_samples = K.shape(losses)[0]
grads = K.map_fn(
lambda i: self._grad_norm(losses[i]),
K.arange(0, nb_samples),
dtype=K.floatx()
)
return K.reshape(grads, (-1, 1))
def call(self, x, mask=None):
conv_out = K.conv2d(x, self.W, strides=self.strides,
padding=self.padding,
data_format=self.data_format,
filter_shape=self.kernel_shape)
if self.data_format == 'channels_first':
# Complex-cell filter operation
conv_out1 = K.sqrt(K.square(conv_out[:, :self.filters_complex, :, :]) + K.square(conv_out[:, self.filters_complex:2*self.filters_complex, :, :]) + K.epsilon())
# Simple-cell filter operation
conv_out2 = K.concatenate([conv_out1, conv_out[:, 2*self.filters_complex:, :, :]], axis=1)
elif self.data_format == 'channels_last':
# Complex-cell filter operation
conv_out1 = K.sqrt(K.square(conv_out[:, :, :, :self.filters_complex]) + K.square(conv_out[:, :, :, self.filters_complex:2*self.filters_complex]) + K.epsilon())
# Simple-cell filter operation
conv_out2 = K.concatenate([conv_out1, conv_out[:, :, :, 2*self.filters_complex:]], axis=3)
if self.bias:
if self.data_format == 'channels_first':
conv_out2 += K.reshape(self.b, (1, self.filters_complex + self.filters_simple, 1, 1))
elif self.data_format == 'channels_last':
conv_out2 += K.reshape(self.b, (1, 1, 1, self.filters_complex + self.filters_simple))
return self.activation(conv_out2)
def call(self, inputs):
stim = inputs[0]
center = inputs[1]
centers_x = self.XX[None, :, :, None] - center[:, 0, None, None, None] - self.centers[0][None, None, None, :]
centers_y = self.YY[None, :, :, None] - center[:, 1, None, None, None] - self.centers[1][None, None, None, :]
senv = self.stds[None, None, None, :]
gauss = self.gauss_scale * (K.square(self.dx) / (2 * np.pi * K.square(senv) + K.epsilon()))*K.exp(-(K.square(centers_x) + K.square(centers_y))/(2.0 * K.square(senv)))
# gauss = (1 / K.sqrt(2 * np.pi * K.square(senv) + K.epsilon()))*K.exp(-(K.square(centers_x) + K.square(centers_y))/(2.0 * K.square(senv)))
# gauss /= K.max(gauss, axis=(1, 2), keepdims=True)
gauss = K.reshape(gauss, self.kernel_shape)
if K.backend() == 'theano':
output = K.sum(stim[..., None] * K.pattern_broadcast(gauss, self.kernel_broadcast), axis=self.filter_axes, keepdims=False)
else:
output = K.sum(stim[..., None] * gauss, axis=self.filter_axes, keepdims=False)
return output
def step(self, x, states):
prev_output = states[0]
B_U = states[1]
B_W = states[2]
if self.consume_less == 'cpu':
h = x
else:
h = K.dot(x * B_W, self.W)
if (self.activation=='soft_thresh'):
preactivation = h + K.dot(prev_output * B_U, self.Uaug)
preactivation_abs = K.sqrt(self.epsilon + preactivation**2 + preactivation[:,self.swap_re_im]**2)
rescale = K.maximum(preactivation_abs+self.baug,0.)/(preactivation_abs + self.epsilon)
output = preactivation*rescale
else:
print "Activation",self.activation,"not implemented"
raise NotImplementedError
return output, [output]
def make_patches_grid(x, patch_size, patch_stride):
'''Break image `x` up into a grid of patches.
input shape: (channels, rows, cols)
output shape: (rows, cols, channels, patch_rows, patch_cols)
'''
from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T
x = K.expand_dims(x, 0)
xs = K.shape(x)
num_rows = 1 + (xs[-2] - patch_size) // patch_stride
num_cols = 1 + (xs[-1] - patch_size) // patch_stride
num_channels = xs[-3]
patches = images2neibs(x,
(patch_size, patch_size), (patch_stride, patch_stride),
mode='valid')
# neibs are sorted per-channel
patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size))
patches = K.permute_dimensions(patches, (1, 0, 2, 3))
# arrange in a 2d-grid (rows, cols, channels, px, py)
patches = K.reshape(patches, (num_rows, num_cols, num_channels, patch_size, patch_size))
patches_norm = K.sqrt(K.sum(K.square(patches), axis=(2,3,4), keepdims=True))
return patches, patches_norm
# get tensor representations of our images
def sample_adaptive_normal_noise(inputs, **kwargs):
from keras.backend import shape, random_normal, sqrt
seed = kwargs.get('seed', 7)
latent_dim = kwargs.get('latent_dim', 2)
if isinstance(inputs, list):
mu, sigma2 = inputs
n_samples = kwargs.get('n_samples', shape(mu)[0])
samples_isotropic = random_normal(shape=(n_samples, latent_dim),
mean=0, stddev=1, seed=seed)
samples = mu + sqrt(sigma2) * samples_isotropic
return samples
else:
samples_isotropic = random_normal(shape=(shape(inputs)[0], latent_dim),
mean=0, stddev=1, seed=seed)
return samples_isotropic
def get_weightnorm_params_and_grads(p, g):
ps = K.get_variable_shape(p)
# construct weight scaler: V_scaler = g/||V||
V_scaler_shape = (ps[-1],) # assumes we're using tensorflow!
V_scaler = K.ones(V_scaler_shape) # init to ones, so effective parameters don't change
# get V parameters = ||V||/g * W
norm_axes = [i for i in range(len(ps) - 1)]
V = p / tf.reshape(V_scaler, [1] * len(norm_axes) + [-1])
# split V_scaler into ||V|| and g parameters
V_norm = tf.sqrt(tf.reduce_sum(tf.square(V), norm_axes))
g_param = V_scaler * V_norm
# get grad in V,g parameters
grad_g = tf.reduce_sum(g * V, norm_axes) / V_norm
grad_V = tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) * \
(g - tf.reshape(grad_g / V_norm, [1] * len(norm_axes) + [-1]) * V)
return V, V_norm, V_scaler, g_param, grad_g, grad_V
def call(self, x, mask=None):
def image_expand(tensor):
return K.expand_dims(K.expand_dims(tensor, -1), -1)
def batch_image_expand(tensor):
return image_expand(K.expand_dims(tensor, 0))
hw = K.cast(x.shape[2] * x.shape[3], K.floatx())
mu = K.sum(x, [-1, -2]) / hw
mu_vec = image_expand(mu)
sig2 = K.sum(K.square(x - mu_vec), [-1, -2]) / hw
y = (x - mu_vec) / (K.sqrt(image_expand(sig2)) + K.epsilon())
scale = batch_image_expand(self.scale)
shift = batch_image_expand(self.shift)
return scale*y + shift
# else:
# raise NotImplemented("Please complete `CycGAN/layers/padding.py` to run on backend {}.".format(K.backend()))
def semantic_matrix(argv):
assert len(argv) == 2
q = argv[0]
a = argv[1]
q_sqrt = K.sqrt((q ** 2).sum(axis=2, keepdims=True))
a_sqrt = K.sqrt((a ** 2).sum(axis=2, keepdims=True))
denominator = K.batch_dot(q_sqrt, K.permute_dimensions(a_sqrt, [0,2,1]))
return K.batch_dot(q, K.permute_dimensions(a, [0,2,1])) / (denominator + SAFE_EPSILON)
# ??idx??????
# ??????batch index????????
# ??https://groups.google.com/forum/#!topic/theano-users/7gUdN6E00Dc
# ??argmax???2 - axis
# ??theano??a > 0????????[1,1,0]?????????????
# ?bool???????????
# ??????????T.set_subtensor(ib[(ib < 0).nonzero()], 0)
def iterate_softmax(model, neuron):
input_tensor = model.input
# this is a placeholder tensor that will contain our generated images
# build a loss function that maximizes the activation
# of the nth filter of the layer considered
print('X shape', model.output[:, neuron])
x = model.output
loss_weight_continuity = 0.0
loss_weight_activity = 1.0
loss = K.mean(x)
#loss += loss_weight_continuity * total_variation_norm(input_tensor)
# compute the gradient of the input picture wrt this loss
grads = K.gradients(loss, input_tensor)[0]
# normalization trick: we normalize the gradient
grads /= (K.sqrt(K.mean(K.square(grads))) + 1e-5)
# this function returns the loss and grads given the input picture
return K.function([input_tensor], [loss, grads])
def custom_for_keras(self, ALL_word_embeds):
## only the top 20 rows from word_vectors is legit!
def top_accuracy(true_word_indices, image_vectors):
l2 = lambda x, axis: K.sqrt(K.sum(K.square(x), axis=axis, keepdims=True))
l2norm = lambda x, axis: x/l2(x, axis)
l2_words = l2norm(ALL_word_embeds, axis=1)
l2_images = l2norm(image_vectors, axis=1)
tiled_words = K.tile(K.expand_dims(l2_words, axis=1) , (1, 200, 1))
tiled_images = K.tile(K.expand_dims(l2_images, axis=1), (1, 20, 1))
diff = K.squeeze(l2(l2_words - l2_images, axis=2))
# slice_top3 = lambda x: x[:, 0:3]
# slice_top1 = lambda x: x[:, 0:1]
diff_top5 = metrics.top_k_categorical_accuracy(tiled_images, diff)
return diff_top5
return top_accuracy
def get_weightnorm_params_and_grads(p, g):
ps = K.get_variable_shape(p)
# construct weight scaler: V_scaler = g/||V||
V_scaler_shape = (ps[-1],) # assumes we're using tensorflow!
V_scaler = K.ones(V_scaler_shape) # init to ones, so effective parameters don't change
# get V parameters = ||V||/g * W
norm_axes = [i for i in range(len(ps) - 1)]
V = p / tf.reshape(V_scaler, [1] * len(norm_axes) + [-1])
# split V_scaler into ||V|| and g parameters
V_norm = tf.sqrt(tf.reduce_sum(tf.square(V), norm_axes))
g_param = V_scaler * V_norm
# get grad in V,g parameters
grad_g = tf.reduce_sum(g * V, norm_axes) / V_norm
grad_V = tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) * \
(g - tf.reshape(grad_g / V_norm, [1] * len(norm_axes) + [-1]) * V)
return V, V_norm, V_scaler, g_param, grad_g, grad_V
def make_patches_grid(x, patch_size, patch_stride):
'''Break image `x` up into a grid of patches.
input shape: (channels, rows, cols)
output shape: (rows, cols, channels, patch_rows, patch_cols)
'''
from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T
x = K.expand_dims(x, 0)
xs = K.shape(x)
num_rows = 1 + (xs[-2] - patch_size) // patch_stride
num_cols = 1 + (xs[-1] - patch_size) // patch_stride
num_channels = xs[-3]
patches = images2neibs(x,
(patch_size, patch_size), (patch_stride, patch_stride),
mode='valid')
# neibs are sorted per-channel
patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size))
patches = K.permute_dimensions(patches, (1, 0, 2, 3))
# arrange in a 2d-grid (rows, cols, channels, px, py)
patches = K.reshape(patches, (num_rows, num_cols, num_channels, patch_size, patch_size))
patches_norm = K.sqrt(K.sum(K.square(patches), axis=(2,3,4), keepdims=True))
return patches, patches_norm
def build_mdl(len_words, embed_dim, embeds, len_sent1, len_sent2):
embeds.insert(0, np.zeros(embeds[0].shape, dtype='float32')) # for padding
input_q = Input(shape=(len_sent1,), dtype='int32')
input_a = Input(shape=(len_sent2,), dtype='int32')
embed = Embedding(mask_zero=True, input_dim=len_words+1, output_dim=embed_dim,
weights=[np.array(embeds)], dropout=0.2)
x_q = embed(input_q)
x_a = embed(input_a)
rnn_q = LSTM(64, input_dim=embed_dim, return_sequences=False, input_length=len_sent1)(x_q)
rnn_a = LSTM(64, input_dim=embed_dim, return_sequences=False, input_length=len_sent2)(x_a)
dense_q = Dense(32)(rnn_q)
dense_a = Dense(32)(rnn_a)
def cosine(x):
axis = len(x[0]._keras_shape) - 1
dot = lambda a, b: K.batch_dot(a, b, axes=axis)
return dot(x[0], x[1]) / K.sqrt(dot(x[0], x[0]) * dot(x[1], x[1]))
# https://github.com/fchollet/keras/issues/2299
cosine_sim = merge([dense_q, dense_a], mode=cosine, output_shape=(1,))
model = Model(input=[input_q, input_a], output=[cosine_sim])
model.compile(optimizer='rmsprop', loss='mse')
return model
def ori_loss(y_true, y_pred, lamb=1.):
# clip
y_pred = K.tf.clip_by_value(y_pred, K.epsilon(), 1 - K.epsilon())
# get ROI
label_seg = K.sum(y_true, axis=-1, keepdims=True)
label_seg = K.tf.cast(K.tf.greater(label_seg, 0), K.tf.float32)
# weighted cross entropy loss
lamb_pos, lamb_neg = 1., 1.
logloss = lamb_pos*y_true*K.log(y_pred)+lamb_neg*(1-y_true)*K.log(1-y_pred)
logloss = logloss*label_seg # apply ROI
logloss = -K.sum(logloss) / (K.sum(label_seg) + K.epsilon())
# coherence loss, nearby ori should be as near as possible
mean_kernal = np.reshape(np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]], dtype=np.float32)/8, [3, 3, 1, 1])
sin2angle_ori, cos2angle_ori, modulus_ori = ori2angle(y_pred)
sin2angle = K.conv2d(sin2angle_ori, mean_kernal, padding='same')
cos2angle = K.conv2d(cos2angle_ori, mean_kernal, padding='same')
modulus = K.conv2d(modulus_ori, mean_kernal, padding='same')
coherence = K.sqrt(K.square(sin2angle) + K.square(cos2angle)) / (modulus + K.epsilon())
coherenceloss = K.sum(label_seg) / (K.sum(coherence*label_seg) + K.epsilon()) - 1
loss = logloss + lamb*coherenceloss
return loss
def get_gradients(self, loss, params):
'''
Replacement for the default keras get_gradients() function.
Modification: checks if the object has the attribute grads and
returns that rather than calculating the gradients using automatic
differentiation.
'''
if hasattr(self, 'grads'):
grads = self.grads
else:
grads = K.gradients(loss, params)
if hasattr(self, 'clipnorm') and self.clipnorm > 0:
norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads]))
grads = [clip_norm(g, self.clipnorm, norm) for g in grads]
if hasattr(self, 'clipvalue') and self.clipvalue > 0:
grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads]
return grads
def _get_output_functions(self):
# if you name your layers you can use model.get_layer('recurrent_layer')
model = self.tweet_classifier
recurrent_layer = model.layers[2]
attention_layer = model.layers[5]
merged_layer = model.layers[9]
output_layer = model.layers[10]
layers = [recurrent_layer, attention_layer, merged_layer, output_layer]
outputs = []
for l in layers:
outputs.append(l.output)
loss = K.mean(model.output)
grads = K.gradients(loss, l.output)
grads_norm = grads / (K.sqrt(K.mean(K.square(grads))) + 1e-5)
outputs.append(grads_norm)
all_function = K.function([model.layers[0].input, K.learning_phase()],
outputs)
return all_function
def step(self, x, states):
prev_output = states[0]
B_U = states[1]
B_W = states[2]
if self.consume_less == 'cpu':
h = x
else:
h = K.dot(x * B_W, self.W)
if (self.activation=='soft_thresh'):
preactivation = h + K.dot(prev_output * B_U, self.Uaug)
preactivation_abs = K.sqrt(self.epsilon + preactivation**2 + preactivation[:,self.swap_re_im]**2)
rescale = K.maximum(preactivation_abs+self.baug,0.)/(preactivation_abs + self.epsilon)
output = preactivation*rescale
else:
print "Activation",self.activation,"not implemented"
raise NotImplementedError
return output, [output]
def call(self, x, mask=None):
ax = 1 if self.is_q else 2
def _step(v1, v2):
cosine_score = T.tensordot(v1 / T.sqrt(T.sum(T.sqr(v1), axis=2, keepdims=True) + 1e-6),
(v2) / T.sqrt(T.sum(T.sqr(v2), axis=ax, keepdims=True) + 1e-6),
[[2], [ax]])
return cosine_score
l_s = x[0] # n_b x n_s x n_w_s x D
l_a = x[1] # n_b x 4 x n_w_qa x D
# w_qa = self.layers[2].get_output(train) # n_b x 4 x n_w_qa x 1
# w_qa = T.addbroadcast(w_qa, len(self.layers[2].output_shape) - 1)
# get cosine similarity for ALL word pairs
output, _ = theano.scan(_step, sequences=[l_s, l_a], outputs_info=None)
if not self.is_q:
output = output.dimshuffle(0, 1, 3, 2, 4) # n_b x n_s x 4 x n_w_s x n_w_qa
return output
def call(self, x, mask=None):
l_q = x[0] # n_b x n_s
l_a = x[1] # n_b x n_s x 4
# add broadcast dimension to end of l_q
l_q = l_q.dimshuffle(0, 1, 'x')
if self.mean_type == 'harmonic':
# compute harmonic mean of two scores
output = 2. * l_q * l_a / (l_q + l_a + 0.00001) * self.beta
elif self.mean_type == 'geometric':
# compute geometric mean of two scores
output = T.sqrt(l_q * l_a + 0.00001) * self.beta
elif self.mean_type == 'bilinear':
output = l_q * l_a * self.alpha + self.beta * l_a + self.gama * l_q
else:
# compute arithmetic mean
output = (l_q + l_a) / 2.
return output + 0 * (self.alpha + self.beta + self.gama)
def _cosine_similarity(self, x1, x2):
"""Compute cosine similarity.
# Arguments:
x1: (..., embedding_size)
x2: (..., embedding_size)
"""
cos = K.sum(x1 * x2, axis=-1)
x1_norm = K.sqrt(K.maximum(K.sum(K.square(x1), axis=-1), self.epsilon))
x2_norm = K.sqrt(K.maximum(K.sum(K.square(x2), axis=-1), self.epsilon))
cos = cos / x1_norm / x2_norm
return cos
def LN(x, gamma, beta, epsilon=1e-6, axis=-1):
m = K.mean(x, axis=axis, keepdims=True)
std = K.sqrt(K.var(x, axis=axis, keepdims=True) + epsilon)
x_normed = (x - m) / (std + epsilon)
x_normed = gamma * x_normed + beta
return x_normed
def call(self, inputs):
kernel = self.kernel * self.g / K.sqrt(K.sum(K.square(self.kernel), axis=0))
output = K.dot(inputs, kernel)
if self.use_bias:
output = K.bias_add(output, self.bias)
if self.activation is not None:
output = self.activation(output)
return output
def call(self, x):
kernel = self.kernel * self.g / K.sqrt(K.sum(K.square(self.kernel), axis=[0, 1, 2], keepdims=True))
output = K.conv2d(x, kernel, strides=self.strides,
padding=self.padding,
data_format=self.data_format)
if self.use_bias:
output = K.bias_add(output, self.bias, data_format=self.data_format)
if self.activation is not None:
output = self.activation(output)
return output