def get_output_for(self, input, deterministic=False, **kwargs):
if deterministic:
# use stored mean and std
mean = self.mean
std = self.std
else:
# use this batch's mean and std
mean = input.mean(self.axes, keepdims=True)
std = input.std(self.axes, keepdims=True)
# and update the stored mean and std:
# we create (memory-aliased) clones of the stored mean and std
running_mean = theano.clone(self.mean, share_inputs=False)
running_std = theano.clone(self.std, share_inputs=False)
# set a default update for them
running_mean.default_update = ((1 - self.alpha) * running_mean +
self.alpha * mean)
running_std.default_update = ((1 - self.alpha) * running_std +
self.alpha * std)
# and include them in the graph so their default updates will be
# applied (although the expressions will be optimized away later)
mean += 0 * running_mean
std += 0 * running_std
std += self.epsilon
mean = T.addbroadcast(mean, *self.axes)
std = T.addbroadcast(std, *self.axes)
beta = T.addbroadcast(self.beta, *self.axes)
gamma = T.addbroadcast(self.gamma, *self.axes)
normalized = (input - mean) * (gamma / std) + beta
return self.nonlinearity(normalized)
评论列表
文章目录