def listmle(x, t):
"""
The ListMLE loss as in Xia et al (2008), Listwise Approach to Learning to
Rank - Theory and Algorithm.
:param x: The activation of the previous layer
:param t: The target labels
:return: The loss
"""
# Get the ground truth by sorting activations by the relevance labels
xp = cuda.get_array_module(t)
t_hat = t[:, 0]
x_hat = x[xp.flip(xp.argsort(t_hat), axis=0)]
# Compute MLE loss
final = logcumsumexp(x_hat)
return F.sum(final - x_hat)
python类sum()的实例源码
def listpl(x, t, ?=15.0):
"""
The ListPL loss, a stochastic variant of ListMLE that in expectation
approximates the true ListNet loss.
:param x: The activation of the previous layer
:param t: The target labels
:param ?: The smoothing factor
:return: The loss
"""
# Sample permutation from PL(t)
index = _pl_sample(t, ?)
x = x[index]
# Compute MLE loss
final = logcumsumexp(x)
return F.sum(final - x)
def __call__(self, x):
h = x
for l in self.conv_layers:
h = self.activation(l(h))
# Advantage
batch_size = x.shape[0]
ya = self.a_stream(h)
mean = F.reshape(
F.sum(ya, axis=1) / self.n_actions, (batch_size, 1))
ya, mean = F.broadcast(ya, mean)
ya -= mean
# State value
ys = self.v_stream(h)
ya, ys = F.broadcast(ya, ys)
q = ya + ys
return action_value.DiscreteActionValue(q)
def compute_policy_gradient_full_correction(
action_distrib, action_distrib_mu, action_value, v,
truncation_threshold):
"""Compute off-policy bias correction term wrt all actions."""
assert truncation_threshold is not None
assert np.isscalar(v)
with chainer.no_backprop_mode():
rho_all_inv = compute_full_importance(action_distrib_mu,
action_distrib)
correction_weight = (
np.maximum(1 - truncation_threshold * rho_all_inv,
np.zeros_like(rho_all_inv)) *
action_distrib.all_prob.data[0])
correction_advantage = action_value.q_values.data[0] - v
return -F.sum(correction_weight *
action_distrib.all_log_prob *
correction_advantage, axis=1)
def setUp(self):
def evaluator(actions):
# negative square norm of actions
return -F.sum(actions ** 2, axis=1)
self.evaluator = evaluator
if self.has_maximizer:
def maximizer():
return chainer.Variable(np.zeros(
(self.batch_size, self.action_size), dtype=np.float32))
else:
maximizer = None
self.maximizer = maximizer
self.av = action_value.SingleActionValue(
evaluator=evaluator, maximizer=maximizer)
def __call__(self, x, t, train=True, finetune=False):
h = x
h = F.dropout(h, ratio=0.2, train=train)
h = self.l1(h, train, finetune)
h = self.l2(h, train, finetune)
h = self.l3(h, train, finetune)
h = F.dropout(h, ratio=0.5, train=train)
h = self.l4(h, train, finetune)
h = self.l5(h, train, finetune)
h = self.l6(h, train, finetune)
h = F.dropout(h, ratio=0.5, train=train)
h = self.l7(h, train, finetune)
h = self.l8(h, train, finetune)
h = self.l9(h, train, finetune)
h = F.sum(h, axis=-1)
h = F.sum(h, axis=-1)
h = F.sum(h, axis=-1)
h /= 8 * 8 * 8
return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
def __call__(self, x, t, train=True, finetune=False):
h = x
h = F.dropout(h, ratio=0.2, train=train)
h = self.l1(h, train, finetune)
h = self.l2(h, train, finetune)
h = self.l3(h, train, finetune)
h = F.dropout(h, ratio=0.5, train=train)
h = self.l4(h, train, finetune)
h = self.l5(h, train, finetune)
h = self.l6(h, train, finetune)
h = F.dropout(h, ratio=0.5, train=train)
h = self.l7(h, train, finetune)
h = self.l8(h, train, finetune)
h = self.l9(h, train, finetune)
h = F.sum(h, axis=-1)
h = F.sum(h, axis=-1)
h = F.sum(h, axis=-1)
h /= 8 * 8 * 4
return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
def __call__(self, x, t, train=True, finetune=False):
h = x
h = F.dropout(h, ratio=0.2, train=train)
h = self.l1(h, train, finetune)
h = self.l2(h, train, finetune)
h = self.l3(h, train, finetune)
h = F.dropout(h, ratio=0.5, train=train)
h = self.l4(h, train, finetune)
h = self.l5(h, train, finetune)
h = self.l6(h, train, finetune)
h = F.dropout(h, ratio=0.5, train=train)
h = self.l7(h, train, finetune)
h = self.l8(h, train, finetune)
h = self.l9(h, train, finetune)
h = F.sum(h, axis=-1)
h = F.sum(h, axis=-1)
h /= 8 * 8
return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
def __call__(self, ht, xs, d_bar_s_1):
#ht:encoder?????????????????
#batch_size * n_words * in_size
#xs:??????
if d_bar_s_1 == None:
d_bar_s_1 = np.zeros(self.in_size)
ht_T = list(map(F.transpose, ht))
phi_ht = list(map(W1, ht_T))
d_s = rnn(d_bar_s_1, y_s_1)
phi_d = F.transpose_sequence(W2(F.transpose_sequence(d_s)))
u_st = list(map(lambda x: phi_d*x, phi_ht)) #(4)
sum_u = F.sum(u_st)
alpha_st = list(map(lambda x:x/sum_u, u_st)) #(3)
z_s = F.argmax(alpha_st, axis=0)
c_s = F.sum(list(map(lambda x,y:x*y , alpha_st, ht))) #(2)
d_bar_s = F.relu(W3(F.concat([c_s, d_s])))
return d_bar_s, d_s, c_s, z_s
def nearest_neighbor_patch(x, patch, patch_norm):
assert patch.data.shape[0] == 1, 'mini batch size of patch must be 1'
assert patch_norm.data.shape[0] == 1, 'mini batch size of patch_norm must be 1'
xp = cuda.get_array_module(x.data)
z = x.data
b, ch, h, w = z.shape
z = z.transpose((1, 0, 2, 3)).reshape((ch, -1))
norm = xp.expand_dims(xp.sum(z ** 2, axis=0) ** 0.5, 0)
z = z / xp.broadcast_to(norm, z.shape)
p = patch.data
p_norm = patch_norm.data
p = p.reshape((ch, -1))
p_norm = p_norm.reshape((1, -1))
p_normalized = p / xp.broadcast_to(p_norm, p.shape)
correlation = z.T.dot(p_normalized)
min_index = xp.argmax(correlation, axis=1)
nearest_neighbor = p.take(min_index, axis=1).reshape((ch, b, h, w)).transpose((1, 0, 2, 3))
return Variable(nearest_neighbor)
def _sample_discrete_actions(batch_probs):
"""Sample a batch of actions from a batch of action probabilities.
Args:
batch_probs (ndarray): batch of action probabilities BxA
Returns:
List consisting of sampled actions
"""
action_indices = []
# Subtract a tiny value from probabilities in order to avoid
# "ValueError: sum(pvals[:-1]) > 1.0" in numpy.multinomial
batch_probs = batch_probs - np.finfo(np.float32).epsneg
for i in range(batch_probs.shape[0]):
histogram = np.random.multinomial(1, batch_probs[i])
action_indices.append(int(np.nonzero(histogram)[0]))
return action_indices
def __call__(self, ws, cs, ls, ts):
h_w = self.emb_word(ws) #_(batchsize, windowsize, word_dim)
h_c = self.emb_char(cs) # (batchsize, windowsize, max_char_len, char_dim)
batchsize, windowsize, _, _ = h_c.data.shape
# (batchsize, windowsize, char_dim)
h_c = F.sum(h_c, 2)
h_c, ls = F.broadcast(h_c, F.reshape(ls, (batchsize, windowsize, 1)))
h_c = h_c / ls
h = F.concat([h_w, h_c], 2)
h = F.reshape(h, (batchsize, -1))
# ys = self.linear1(h)
h = F.relu(self.linear1(h))
h = F.dropout(h, ratio=.5, train=self.train)
ys = self.linear2(h)
loss = F.softmax_cross_entropy(ys, ts)
acc = F.accuracy(ys, ts)
chainer.report({
"loss": loss,
"accuracy": acc
}, self)
return loss
def meanQvalue(Q, samples):
xp = Q.xp
s = np.ndarray(shape=(minibatch_size, STATE_LENGTH, FRAME_WIDTH, FRAME_HEIGHT), dtype=np.float32)
a = np.asarray([sample[1] for sample in samples], dtype=np.int32)
for i in xrange(minibatch_size):
s[i] = samples[i][0]
# to gpu if available
s = xp.asarray(s)
a = xp.asarray(a)
# Prediction: Q(s,a)
y = F.select_item(Q(s), a)
mean_Q = (F.sum(y)/minibatch_size).data
return mean_Q
def meanQvalue(Q, samples):
xp = Q.xp
s = np.ndarray(shape=(minibatch_size, STATE_LENGTH, FRAME_WIDTH, FRAME_HEIGHT), dtype=np.float32)
a = np.asarray([sample[1] for sample in samples], dtype=np.int32)
for i in xrange(minibatch_size):
s[i] = samples[i][0]
# to gpu if available
s = xp.asarray(s)
a = xp.asarray(a)
# Prediction: Q(s,a)
y = F.select_item(Q(s), a)
mean_Q = (F.sum(y)/minibatch_size).data
return mean_Q
def mean_feature(net, paths, image_size, base_feature, top_num, batch_size, clip_rect=None):
xp = net.xp
image_num = len(paths)
features = []
for i in six.moves.range(0, image_num, batch_size):
x = [preprocess_image(Image.open(path).convert('RGB'), image_size, clip_rect) for path in paths[i:i + batch_size]]
x = xp.asarray(np.concatenate(x, axis=0))
y = feature(net, x)
features.append([cuda.to_cpu(layer.data) for layer in y])
if image_num > top_num:
last_features = np.concatenate([f[-1] for f in features], axis=0)
last_features = last_features.reshape((last_features.shape[0], -1))
base_feature = cuda.to_cpu(base_feature).reshape((1, -1,))
diff = np.sum((last_features - base_feature) ** 2, axis=1)
nearest_indices = np.argsort(diff)[:top_num]
nearests = [np.concatenate(xs, axis=0)[nearest_indices] for xs in zip(*features)]
else:
nearests = [np.concatenate(xs, axis=0) for xs in zip(*features)]
return [xp.asarray(np.mean(f, axis=0, keepdims=True)) for f in nearests]
def normalized_mutual_info_score(x, y):
xp = chainer.cuda.get_array_module(x)
contingency = contingency_matrix(x, y)
nonzero_mask = contingency != 0
nonzero_val = contingency[nonzero_mask]
pi = contingency.sum(axis=1, keepdims=True)
pj = contingency.sum(axis=0, keepdims=True)
total_mass = pj.sum()
pi /= total_mass
pj /= total_mass
pi_pj = (pj * pi)[nonzero_mask]
pij = nonzero_val / total_mass
log_pij = xp.log(pij)
log_pi_pj = xp.log(pi_pj)
mi = xp.sum(pij * (log_pij - log_pi_pj))
nmi = mi / max(xp.sqrt(entropy(pi) * entropy(pj)), 1e-10)
return xp.clip(nmi, 0, 1)
def __call__(self, x, z, ze, mask, conv_mask):
att_scale = self.xp.sum(
mask, axis=2, keepdims=True)[:, None, :, :] ** 0.5
pad = self.xp.zeros(
(x.shape[0], x.shape[1], self.width - 1, 1), dtype=x.dtype)
base_x = x
z = F.squeeze(z, axis=3)
# Note: these behaviors of input, output, and attention result
# may refer to the code by authors, which looks little different
# from the paper's saying.
for conv_name, preatt_name in zip(self.conv_names, self.preatt_names):
# Calculate Output of GLU
out = getattr(self, conv_name)(
F.concat([pad, x], axis=2), conv_mask)
# Calcualte Output of Attention using Output of GLU
preatt = seq_linear(getattr(self, preatt_name), out)
query = base_x + preatt
query = F.squeeze(query, axis=3)
c = self.attend(query, z, ze, mask) * att_scale
# Merge Them in Redidual Calculation and Scaling
x = (x + (c + out) * scale05) * scale05
return x
def attend(self, query, key, value, mask, minfs=None):
"""
Input shapes:
q=(b, units, dec_l), k=(b, units, enc_l),
v=(b, units, dec_l, enc_l), m=(b, dec_l, enc_l)
"""
# Calculate Attention Scores with Mask for Zero-padded Areas
pre_a = F.batch_matmul(query, key, transa=True) # (b, dec_l, enc_l)
minfs = self.xp.full(pre_a.shape, -np.inf, pre_a.dtype) \
if minfs is None else minfs
pre_a = F.where(mask, pre_a, minfs)
a = F.softmax(pre_a, axis=2)
# if values in axis=2 are all -inf, they become nan. thus do re-mask.
a = F.where(self.xp.isnan(a.data),
self.xp.zeros(a.shape, dtype=a.dtype), a)
reshaped_a = a[:, None] # (b, 1, dec_xl, enc_l)
# Calculate Weighted Sum
pre_c = F.broadcast_to(reshaped_a, value.shape) * value
c = F.sum(pre_c, axis=3, keepdims=True) # (b, units, dec_xl, 1)
return c
def avg_pool_max_pool(self, hs):
num_output = len(hs[0])
houts = []
i = 0
shape = hs[0][i].shape
h = F.dstack([F.reshape(h[i],(shape[0], -1)) for h in hs])
x = 1.0*F.sum(h,2)/h.shape[2]
x = F.reshape(x, shape)
houts.append(x)
for i in range(1,num_output):
shape = hs[0][i].shape
h = F.dstack([F.reshape(h[i],(shape[0], -1)) for h in hs])
x = 1.0*F.max(h,2)
x = F.reshape(x, shape)
houts.append(x)
return houts
def max_pool_avg_pool(self, hs):
num_output = len(hs[0])
houts = []
i = 0
shape = hs[0][i].shape
h = F.dstack([F.reshape(h[i],(shape[0], -1)) for h in hs])
x = 1.0*F.max(h,2)
x = F.reshape(x, shape)
houts.append(x)
for i in range(1,num_output):
shape = hs[0][i].shape
h = F.dstack([F.reshape(h[i],(shape[0], -1)) for h in hs])
x = 1.0*F.sum(h,2)/h.shape[2]
x = F.reshape(x, shape)
houts.append(x)
return houts
def term_feat(self, iloc, jloc, ival, jval, bs, nf, train=True):
# Change all of the shapes to form interaction vectors
shape = (bs, nf * 2, self.n_dim)
feat_mu_vec = F.broadcast_to(self.feat_mu_vec.b, shape)
feat_lv_vec = F.broadcast_to(self.feat_lv_vec.b, shape)
if not train:
feat_lv_vec += self.lv_floor
# Construct the interaction mean and variance
# iloc is (bs, nf), feat(iloc) is (bs, nf, ndim) and
# dot(feat, feat) is (bs, nf)
ivec = F.gaussian(feat_mu_vec + self.feat_delta_mu(iloc),
feat_lv_vec + self.feat_delta_lv(iloc))
jvec = F.gaussian(feat_mu_vec + self.feat_delta_mu(jloc),
feat_lv_vec + self.feat_delta_lv(jloc))
# feat is (bs, )
feat = dot(F.sum(ivec * jvec, axis=2), ival * jval)
# Compute the KLD for the group mean vector and variance vector
kld1 = F.gaussian_kl_divergence(self.feat_mu_vec.b, self.feat_lv_vec.b)
# Compute the KLD for vector deviations from the group mean and var
kld2 = F.gaussian_kl_divergence(self.feat_delta_mu.W,
self.feat_delta_lv.W)
return feat, kld1 + kld2
def __call__(self, loc, val, y, train=True):
bs = val.data.shape[0]
pred, kld0, kld1, kld2 = self.forward(loc, val, y, train=train)
# Compute MSE loss
mse = F.mean_squared_error(pred, y)
rmse = F.sqrt(mse) # Only used for reporting
# Now compute the total KLD loss
kldt = kld0 * self.lambda0 + kld1 * self.lambda1 + kld2 * self.lambda2
# Total loss is MSE plus regularization losses
loss = mse + kldt * (1.0 / self.total_nobs)
# Log the errors
logs = {'loss': loss, 'rmse': rmse, 'kld0': kld0, 'kld1': kld1,
'kld2': kld2, 'kldt': kldt, 'bias': F.sum(self.bias_mu.b)}
reporter.report(logs, self)
return loss
def __call__(self, loc, val, y, train=True):
bs = val.data.shape[0]
ret = self.forward(loc, val, y, train=train)
pred, kld0, kld1, kldg, kldi, hypg, hypi = ret
# Compute MSE loss
mse = F.mean_squared_error(pred, y)
rmse = F.sqrt(mse) # Only used for reporting
# Now compute the total KLD loss
kldt = kld0 * self.lambda0 + kld1 * self.lambda1
kldt += kldg + kldi + hypg + hypi
# Total loss is MSE plus regularization losses
loss = mse + kldt * (1.0 / self.total_nobs)
# Log the errors
logs = {'loss': loss, 'rmse': rmse, 'kld0': kld0, 'kld1': kld1,
'kldg': kldg, 'kldi': kldi, 'hypg': hypg, 'hypi': hypi,
'hypglv': F.sum(self.hyper_feat_lv_vec.b),
'hypilv': F.sum(self.hyper_feat_delta_lv.b),
'kldt': kldt, 'bias': F.sum(self.bias_mu.b)}
reporter.report(logs, self)
return loss
def evaluate_on_diff_env(env, n_sample_traj, agent):
# this function is used to sample n traj using GAN version of environment
R = 0.0
# reset environment
env.reset_state()
agent.reset_state()
# get initial observation
observations = env(tv(np.zeros((n_sample_traj, env.act_size))))[:, :-2]
for i in range(env.spec.timestep_limit):
act = agent(observations)
obs_rew = env(act)
rewards = obs_rew[:, -2]
ends = obs_rew[:, -1]
observations = obs_rew[:, :-2]
R += F.sum(rewards * (1.0 - ends)) / (-len(rewards) * env.spec.timestep_limit)
return R
def calc_log_posterior(theta, x, n=None):
"""Calculate unnormalized log posterior, ``log p(theta | x) + C``
Args:
theta(chainer.Variable): model parameters
x(numpy.ndarray): sample data
n(int): total data size
Returns:
chainer.Variable: Variable that holding unnormalized log posterior,
``log p(theta | x) + C`` of shape ``()``
"""
theta1, theta2 = F.split_axis(theta, 2, 0)
log_prior1 = F.sum(F.log(gaussian.gaussian_likelihood(theta1, 0, VAR1)))
log_prior2 = F.sum(F.log(gaussian.gaussian_likelihood(theta2, 0, VAR2)))
prob1 = gaussian.gaussian_likelihood(x, theta1, VAR_X)
prob2 = gaussian.gaussian_likelihood(x, theta1 + theta2, VAR_X)
log_likelihood = F.sum(F.log(prob1 / 2 + prob2 / 2))
if n is not None:
log_likelihood *= n / len(x)
return log_prior1 + log_prior2 + log_likelihood
def calc_loss(self, state, state_dash, actions, rewards, done_list):
assert(state.shape == state_dash.shape)
s = state.reshape((state.shape[0], reduce(lambda x, y: x*y, state.shape[1:]))).astype(np.float32)
s_dash = state_dash.reshape((state.shape[0], reduce(lambda x, y: x*y, state.shape[1:]))).astype(np.float32)
q = self.model.q_function(s)
q_dash = self.model_target.q_function(s_dash) # Q(s',*)
max_q_dash = np.asarray(list(map(np.max, q_dash.data)), dtype=np.float32) # max_a Q(s',a)
target = q.data.copy()
for i in range(self.replay_batch_size):
assert(self.replay_batch_size == len(done_list))
r = np.sign(rewards[i]) if self.clipping else rewards[i]
if done_list[i]:
discounted_sum = r
else:
discounted_sum = r + self.gamma * max_q_dash[i]
assert(self.replay_batch_size == len(actions))
target[i, actions[i]] = discounted_sum
loss = F.sum(F.huber_loss(Variable(target), q, delta=1.0)) #/ self.replay_batch_size
return loss, q
def __call__(self, x):
minibatch_size = x.shape[0]
activation = F.reshape(self.t(x), (-1, self.n_kernels, self.kernel_dim))
activation_ex = F.expand_dims(activation, 3)
activation_ex_t = F.expand_dims(F.transpose(activation, (1, 2, 0)), 0)
activation_ex, activation_ex_t = F.broadcast(activation_ex, activation_ex_t)
diff = activation_ex - activation_ex_t
xp = chainer.cuda.get_array_module(x.data)
eps = F.expand_dims(xp.eye(minibatch_size, dtype=xp.float32), 1)
eps = F.broadcast_to(eps, (minibatch_size, self.n_kernels, minibatch_size))
sum_diff = F.sum(abs(diff), axis=2)
sum_diff = F.broadcast_to(sum_diff, eps.shape)
abs_diff = sum_diff + eps
minibatch_features = F.sum(F.exp(-abs_diff), 2)
return F.concat((x, minibatch_features), axis=1)
def __call__(self, x):
xp = chainer.cuda.get_array_module(x.data)
batchsize = x.shape[0]
if self.train_weights == False and self.initial_T is not None:
self.T.W.data = self.initial_T
M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel))
M = F.expand_dims(M, 3)
M_T = F.transpose(M, (3, 1, 2, 0))
M, M_T = F.broadcast(M, M_T)
norm = F.sum(abs(M - M_T), axis=2)
eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape)
c_b = F.exp(-(norm + 1e6 * eraser))
o_b = F.sum(c_b, axis=2)
if self.train_weights == False:
self.initial_T = self.T.W.data
return F.concat((x, o_b), axis=1)
def ordinal_loss(y, mask):
xp = cuda.get_array_module(y.data)
volatile = y.volatile
b, c, n = y.data.shape
max_y = F.broadcast_to(F.max(y, axis=1, keepdims=True), y.data.shape)
y = y - max_y
sum_y = F.broadcast_to(F.expand_dims(F.sum(y, axis=1), 1), y.data.shape)
down_tri = np.tri(c, dtype=np.float32)
up_tri = down_tri.T
w1 = Variable(xp.asarray(down_tri.reshape(c, c, 1, 1)), volatile=volatile)
w2 = Variable(xp.asarray(up_tri.reshape(c, c, 1, 1)), volatile=volatile)
h = F.exp(F.expand_dims(y, -1))
h1 = F.convolution_2d(h, w1)
h1 = F.convolution_2d(F.log(h1), w1)
h2 = F.convolution_2d(h, w2)
h2 = F.convolution_2d(F.log(h2), w2)
h = F.reshape(h1 + h2, (b, c, n))
return F.sum((h - sum_y - y) * mask) / b
def __forward(self, batch_x, batch_t, weight, train=True):
xp = self.xp
x = Variable(xp.asarray(batch_x), volatile=not train)
t = Variable(xp.asarray(batch_t), volatile=not train)
y = self.net(x, train=train)
b, c, n = y.data.shape
mask = Variable(xp.asarray(np.broadcast_to(weight.reshape(-1, 1, 1), (b, c, n)) * loss_mask(batch_t, self.net.rating_num)), volatile=not train)
if self.ordinal_weight == 0:
loss = F.sum(-F.log_softmax(y) * mask) / b
elif self.ordinal_weight == 1:
loss = ordinal_loss(y, mask)
else:
loss = (1 - self.ordinal_weight) * F.sum(-F.log_softmax(y) * mask) / b + self.ordinal_weight * ordinal_loss(y, mask)
acc = self.__accuracy(y, t)
return loss, acc