def ae(x):
if nonlinearity_name == 'relu':
f = tf.nn.relu
elif nonlinearity_name == 'elu':
f = tf.nn.elu
elif nonlinearity_name == 'gelu':
# def gelu(x):
# return tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.)
# f = gelu
def gelu_fast(_x):
return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
f = gelu_fast
elif nonlinearity_name == 'silu':
def silu(_x):
return _x * tf.sigmoid(_x)
f = silu
# elif nonlinearity_name == 'soi':
# def soi_map(x):
# u = tf.random_uniform(tf.shape(x))
# mask = tf.to_float(tf.less(u, (1 + tf.erf(x / tf.sqrt(2.))) / 2.))
# return tf.cond(is_training, lambda: tf.mul(mask, x),
# lambda: tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.))
# f = soi_map
else:
raise NameError("Need 'relu', 'elu', 'gelu', or 'silu' for nonlinearity_name")
h1 = f(tf.matmul(x, W['1']) + b['1'])
h2 = f(tf.matmul(h1, W['2']) + b['2'])
h3 = f(tf.matmul(h2, W['3']) + b['3'])
h4 = f(tf.matmul(h3, W['4']) + b['4'])
h5 = f(tf.matmul(h4, W['5']) + b['5'])
h6 = f(tf.matmul(h5, W['6']) + b['6'])
h7 = f(tf.matmul(h6, W['7']) + b['7'])
return tf.matmul(h7, W['8']) + b['8']
python类erf()的实例源码
def prob_is_largest(self, Y, mu, var, gh_x, gh_w):
Y = tf.cast(Y, tf.int64)
# work out what the mean and variance is of the indicated latent function.
oh_on = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 1., 0.), settings.float_type)
mu_selected = tf.reduce_sum(oh_on * mu, 1)
var_selected = tf.reduce_sum(oh_on * var, 1)
# generate Gauss Hermite grid
X = tf.reshape(mu_selected, (-1, 1)) + gh_x * tf.reshape(
tf.sqrt(tf.clip_by_value(2. * var_selected, 1e-10, np.inf)), (-1, 1))
# compute the CDF of the Gaussian between the latent functions and the grid (including the selected function)
dist = (tf.expand_dims(X, 1) - tf.expand_dims(mu, 2)) / tf.expand_dims(
tf.sqrt(tf.clip_by_value(var, 1e-10, np.inf)), 2)
cdfs = 0.5 * (1.0 + tf.erf(dist / np.sqrt(2.0)))
cdfs = cdfs * (1 - 2e-4) + 1e-4
# blank out all the distances on the selected latent function
oh_off = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 0., 1.), settings.float_type)
cdfs = cdfs * tf.expand_dims(oh_off, 2) + tf.expand_dims(oh_on, 2)
# take the product over the latent functions, and the sum over the GH grid.
return tf.matmul(tf.reduce_prod(cdfs, reduction_indices=[1]), tf.reshape(gh_w / np.sqrt(np.pi), (-1, 1)))
def _normal_distribution_cdf(x, stddev):
"""Evaluates the CDF of the normal distribution.
Normal distribution with mean 0 and standard deviation stddev,
evaluated at x=x.
input and output `Tensor`s have matching shapes.
Args:
x: a `Tensor`
stddev: a `Tensor` with the same shape as `x`.
Returns:
a `Tensor` with the same shape as `x`.
"""
return 0.5 * (1.0 + tf.erf(x / (math.sqrt(2) * stddev + 1e-20)))
def normal_ccdf(x, mu, sigma2):
"""Normal CCDF"""
# Check for degenerate distributions when sigma2 == 0
# if x >= mu, n = 0
# if x < mu, n = 1
# sigma2_le_0 = tf.less_equal(sigma2, 0.)
# x_gte_mu = tf.greater_equal(x, mu)
# x_lt_mu = tf.less(x, mu)
# Never divide by zero, instead the logic below handles degenerate distribution cases
# sigma2 = tf.cond(sigma2_le_0, lambda: tf.ones_like(sigma2), lambda: sigma2)
p = (1. - 0.5 * (1. + tf.erf((x - mu) / tf.sqrt(2. * sigma2))))
# p = tf.cond(tf.logical_and(sigma2_le_0, x_gte_mu), lambda: tf.zeros_like(p), lambda: p)
# p = tf.cond(tf.logical_and(sigma2_le_0, x_lt_mu), lambda: tf.ones_like(p), lambda: p)
return p
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def rect_gaussian_kld(mean, log_var, mean0=0., log_var0=0., reduce_mean=True):
def phi(x):
return tf.exp(-0.5*tf.square(x))/np.sqrt(2*np.pi)
def Phi(x):
return 0.5 + 0.5*tf.erf(x/np.sqrt(2))
smean = tf.square(mean)
var = tf.exp(log_var)
log_std = 0.5*log_var
std = tf.exp(log_std)
smean0 = tf.square(mean0)
var0 = tf.exp(log_var0)
log_std0 = 0.5*log_var0
std0 = tf.exp(log_std0)
tol = 1.0e-10
pzero = Phi(-mean/std)
kld = pzero*(tf.log(pzero+tol) - tf.log(Phi(-mean0/std0)+tol))
kld += (1-pzero)*(log_std0 - log_std + 0.5*(smean0/var0 - smean/var))
kld += (0.5/var0 - 0.5/var)*((smean + var)*(1-pzero) + mean*std*phi(-mean/std))
kld -= (mean0/var0 - mean/var)*(mean*(1-pzero) + std*phi(-mean/std))
kld = tf.reduce_sum(kld, 1)
if reduce_mean:
kld = tf.reduce_mean(kld)
return kld
def probit(x):
return 0.5 * (1.0 + tf.erf(x / np.sqrt(2.0))) * (1 - 2e-3) + 1e-3
def setUp(self):
super(CoreUnaryOpsTest, self).setUp()
self.ops = [
('abs', operator.abs, tf.abs, core.abs_function),
('neg', operator.neg, tf.neg, core.neg),
# TODO(shoyer): add unary + to core TensorFlow
('pos', None, None, None),
('sign', None, tf.sign, core.sign),
('reciprocal', None, tf.reciprocal, core.reciprocal),
('square', None, tf.square, core.square),
('round', None, tf.round, core.round_function),
('sqrt', None, tf.sqrt, core.sqrt),
('rsqrt', None, tf.rsqrt, core.rsqrt),
('log', None, tf.log, core.log),
('exp', None, tf.exp, core.exp),
('log', None, tf.log, core.log),
('ceil', None, tf.ceil, core.ceil),
('floor', None, tf.floor, core.floor),
('cos', None, tf.cos, core.cos),
('sin', None, tf.sin, core.sin),
('tan', None, tf.tan, core.tan),
('acos', None, tf.acos, core.acos),
('asin', None, tf.asin, core.asin),
('atan', None, tf.atan, core.atan),
('lgamma', None, tf.lgamma, core.lgamma),
('digamma', None, tf.digamma, core.digamma),
('erf', None, tf.erf, core.erf),
('erfc', None, tf.erfc, core.erfc),
('lgamma', None, tf.lgamma, core.lgamma),
]
total_size = np.prod([v.size for v in self.original_lt.axes.values()])
self.test_lt = core.LabeledTensor(
tf.cast(self.original_lt, tf.float32) / total_size,
self.original_lt.axes)
def test_Erf(self):
t = tf.erf(self.random(4, 3))
self.check(t)
def feedforward(x):
if nonlinearity_name == 'relu':
f = tf.nn.relu
elif nonlinearity_name == 'elu':
f = tf.nn.elu
elif nonlinearity_name == 'gelu':
# def gelu(x):
# return tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.)
# f = gelu
def gelu_fast(_x):
return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
f = gelu_fast
elif nonlinearity_name == 'silu':
def silu(_x):
return _x * tf.sigmoid(_x)
f = silu
# elif nonlinearity_name == 'soi':
# def soi_map(x):
# u = tf.random_uniform(tf.shape(x))
# mask = tf.to_float(tf.less(u, (1 + tf.erf(x / tf.sqrt(2.))) / 2.))
# return tf.cond(is_training, lambda: tf.mul(mask, x),
# lambda: tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.))
# f = soi_map
else:
raise NameError("Need 'relu', 'elu', 'gelu', or 'silu' for nonlinearity_name")
h1 = f(tf.matmul(x, W['1']) + b['1'])
h1 = tf.cond(is_training, lambda: tf.nn.dropout(h1, p), lambda: h1)
h2 = f(tf.matmul(h1, W['2']) + b['2'])
h2 = tf.cond(is_training, lambda: tf.nn.dropout(h2, p), lambda: h2)
h3 = f(tf.matmul(h2, W['3']) + b['3'])
h3 = tf.cond(is_training, lambda: tf.nn.dropout(h3, p), lambda: h3)
h4 = f(tf.matmul(h3, W['4']) + b['4'])
h4 = tf.cond(is_training, lambda: tf.nn.dropout(h4, p), lambda: h4)
h5 = f(tf.matmul(h4, W['5']) + b['5'])
h5 = tf.cond(is_training, lambda: tf.nn.dropout(h5, p), lambda: h5)
h6 = f(tf.matmul(h5, W['6']) + b['6'])
h6 = tf.cond(is_training, lambda: tf.nn.dropout(h6, p), lambda: h6)
h7 = f(tf.matmul(h6, W['7']) + b['7'])
h7 = tf.cond(is_training, lambda: tf.nn.dropout(h7, p), lambda: h7)
h8 = f(tf.matmul(h7, W['8']) + b['8'])
h8 = tf.cond(is_training, lambda: tf.nn.dropout(h8, p), lambda: h8)
return tf.matmul(h8, W['9']) + b['9']