def test_disabled_traced_function(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
@torch.jit.compile(enabled=False)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
z = doit(x, y)
z2 = doit(x, y)
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y))))
self.assertEqual(z, z2)
python类tanh()的实例源码
def test_python_ir(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
traced, _ = torch.jit.trace(doit, (x, y))
g = torch._C._jit_get_graph(traced)
g2 = torch._C.Graph()
g_to_g2 = {}
for node in g.inputs():
g_to_g2[node] = g2.addInput()
for node in g.nodes():
if node.kind() == "PythonOp":
n_ = g2.create(node.pyname(),
[g_to_g2[i] for i in node.inputs()]) \
.setType(node.typeOption()) \
.s_("note", "from_pyop") \
.i_("some_value", len(node.scalar_args()))
assert(n_.i("some_value") == len(node.scalar_args()))
else:
n_ = g2.createClone(node, lambda x: g_to_g2[x])
assert(n_.kindOf("Offset") == "i")
g_to_g2[node] = g2.appendNode(n_)
for node in g.outputs():
g2.registerOutput(g_to_g2[node])
t_node = g2.create("TensorTest").t_("a", torch.ones([2, 2]))
assert(t_node.attributeNames() == ["a"])
g2.appendNode(t_node)
assert(torch.equal(torch.ones([2, 2]), t_node.t("a")))
self.assertExpected(str(g2))
def update_buffer(self, S_tm1, c_t, o_tm1, ident):
# concat previous output & context
idt = torch.tanh(self.F_u(ident))
o_tm1 = o_tm1.squeeze(0)
z_t = torch.cat([c_t + idt, o_tm1/30], 1)
z_t = z_t.unsqueeze(2)
Sp = torch.cat([z_t, S_tm1[:, :, :-1]], 2)
# update S
u = self.N_u(Sp.view(Sp.size(0), -1))
u[:, :idt.size(1)] = u[:, :idt.size(1)] + idt
u = u.unsqueeze(2)
S = torch.cat([u, S_tm1[:, :, :-1]], 2)
return S
def getCoef(outputs):
'''
Extracts the mean, standard deviation and correlation
params:
outputs : Output of the SRNN model
'''
mux, muy, sx, sy, corr = outputs[:, :, 0], outputs[:, :, 1], outputs[:, :, 2], outputs[:, :, 3], outputs[:, :, 4]
sx = torch.exp(sx)
sy = torch.exp(sy)
corr = torch.tanh(corr)
return mux, muy, sx, sy, corr
def _get_rnn_output(self, input_word, input_char, mask=None, length=None, hx=None):
# hack length from mask
# we do not hack mask from length for special reasons.
# Thus, always provide mask if it is necessary.
if length is None and mask is not None:
length = mask.data.sum(dim=1).long()
# [batch, length, word_dim]
word = self.word_embedd(input_word)
# [batch, length, char_length, char_dim]
char = self.char_embedd(input_char)
char_size = char.size()
# first transform to [batch *length, char_length, char_dim]
# then transpose to [batch * length, char_dim, char_length]
char = char.view(char_size[0] * char_size[1], char_size[2], char_size[3]).transpose(1, 2)
# put into cnn [batch*length, char_filters, char_length]
# then put into maxpooling [batch * length, char_filters]
char, _ = self.conv1d(char).max(dim=2)
# reshape to [batch, length, char_filters]
char = torch.tanh(char).view(char_size[0], char_size[1], -1)
# concatenate word and char [batch, length, word_dim+char_filter]
input = torch.cat([word, char], dim=2)
# apply dropout
input = self.dropout_in(input)
# prepare packed_sequence
if length is not None:
seq_input, hx, rev_order, mask = utils.prepare_rnn_seq(input, length, hx=hx, masks=mask, batch_first=True)
seq_output, hn = self.rnn(seq_input, hx=hx)
output, hn = utils.recover_rnn_seq(seq_output, rev_order, hx=hn, batch_first=True)
else:
# output from rnn [batch, length, hidden_size]
output, hn = self.rnn(input, hx=hx)
output = self.dropout_rnn(output)
if self.dense is not None:
# [batch, length, tag_space]
output = F.elu(self.dense(output))
return output, hn, mask, length
def _get_rnn_output(self, input_word, input_char, mask=None, length=None, hx=None):
# [batch, length, word_dim]
word = self.word_embedd(input_word)
# [batch, length, char_length, char_dim]
char = self.char_embedd(input_char)
char_size = char.size()
# first transform to [batch *length, char_length, char_dim]
# then transpose to [batch * length, char_dim, char_length]
char = char.view(char_size[0] * char_size[1], char_size[2], char_size[3]).transpose(1, 2)
# put into cnn [batch*length, char_filters, char_length]
# then put into maxpooling [batch * length, char_filters]
char, _ = self.conv1d(char).max(dim=2)
# reshape to [batch, length, char_filters]
char = torch.tanh(char).view(char_size[0], char_size[1], -1)
# concatenate word and char [batch, length, word_dim+char_filter]
input = torch.cat([word, char], dim=2)
# output from rnn [batch, length, hidden_size]
output, hn = self.rnn(input, mask, hx=hx)
# apply dropout for the output of rnn
output = self.dropout_rnn(output.transpose(1, 2)).transpose(1, 2)
if self.dense is not None:
# [batch, length, tag_space]
output = F.elu(self.dense(output))
return output, hn, mask, length
def _get_encoder_output(self, input_word, input_char, input_pos, mask_e=None, length_e=None, hx=None):
# [batch, length, word_dim]
word = self.word_embedd(input_word)
# [batch, length, pos_dim]
pos = self.pos_embedd(input_pos)
# [batch, length, char_length, char_dim]
char = self.char_embedd(input_char)
char_size = char.size()
# first transform to [batch *length, char_length, char_dim]
# then transpose to [batch * length, char_dim, char_length]
char = char.view(char_size[0] * char_size[1], char_size[2], char_size[3]).transpose(1, 2)
# put into cnn [batch*length, char_filters, char_length]
# then put into maxpooling [batch * length, char_filters]
char, _ = self.conv1d(char).max(dim=2)
# reshape to [batch, length, char_filters]
char = torch.tanh(char).view(char_size[0], char_size[1], -1)
# apply dropout on input
word = self.dropout_in(word)
pos = self.dropout_in(pos)
char = self.dropout_in(char)
# concatenate word and char [batch, length, word_dim+char_filter]
src_encoding = torch.cat([word, char, pos], dim=2)
# output from rnn [batch, length, hidden_size]
output, hn = self.encoder(src_encoding, mask_e, hx=hx)
# apply dropout
# [batch, length, hidden_size] --> [batch, hidden_size, length] --> [batch, length, hidden_size]
output = self.dropout_out(output.transpose(1, 2)).transpose(1, 2)
return src_encoding, output, hn, mask_e, length_e
def _step(self, H_t, T_t, h0, h_mask, t_mask):
s_lm1 = h0
for l, (rnn_h, rnn_t) in enumerate(zip(self.rnn_h, self.rnn_t)):
s_lm1_H = h_mask.expand_as(s_lm1) * s_lm1
s_lm1_T = t_mask.expand_as(s_lm1) * s_lm1
if l == 0:
H_t = F.tanh(H_t + rnn_h(s_lm1_H))
T_t = F.sigmoid(T_t + rnn_t(s_lm1_T))
else:
H_t = F.tanh(rnn_h(s_lm1_H))
T_t = F.sigmoid(rnn_t(s_lm1_T))
s_l = (H_t - s_lm1) * T_t + s_lm1
s_lm1 = s_l
return s_l
def forward(self, input):
return torch.tanh(input)
def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None):
hx, cx = hidden
gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh)
ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
ingate = F.sigmoid(ingate)
forgetgate = F.sigmoid(forgetgate)
cellgate = F.tanh(cellgate)
outgate = F.sigmoid(outgate)
cy = (forgetgate * cx) + (ingate * cellgate)
hy = outgate * F.tanh(cy)
return hy, cy
def test_simple(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
def f(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
trace, z = torch.jit.trace(f, (x, y), nderivs=0)
self.assertExpectedTrace(trace)
# matmul is currently implemented as a native function, which
# exercises different codepaths in the JIT. The following two
# tests ensure that (1) matmul indeed traces into an atomic,
# native operation, and (2) the JIT knows how to run it
def test_scopes(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
def f(x, y):
out = x + y
with torch.jit.scope('Foo', out):
out = x * out
with torch.jit.scope('Bar', out):
out = torch.tanh(out)
out = torch.sigmoid(out)
return out
trace, z = torch.jit.trace(f, (x, y), nderivs=0)
self.assertExpectedTrace(trace)
def test_cse(self):
x = Variable(torch.Tensor([0.4, 0.3]), requires_grad=True)
y = Variable(torch.Tensor([0.7, 0.5]), requires_grad=True)
trace = torch._C._tracer_enter((x, y), 0)
w = (x + y) * (x + y) * (x + y)
t = torch.tanh(w) + torch.tanh(w)
z = (x + y) * (x + y) * (x + y) + t
torch._C._tracer_exit((z,))
torch._C._jit_pass_lint(trace)
torch._C._jit_pass_cse(trace)
self.assertExpectedTrace(trace)
def test_compile_addc(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True).float().cuda()
y = Variable(torch.Tensor([0.7]), requires_grad=True).float().cuda()
@torch.jit.compile(nderivs=0)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y) + 1))
z = doit(x, y)
with self.assertCompiled(doit):
z2 = doit(x, y)
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y) + 1)))
self.assertEqual(z, z2)
def test_traced_function(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
@torch.jit.compile(nderivs=0)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
z = doit(x, y)
with self.assertCompiled(doit):
z2 = doit(x, y)
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y))))
self.assertEqual(z, z2)
def test_disabled_traced_function(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
@torch.jit.compile(enabled=False)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
z = doit(x, y)
z2 = doit(x, y)
self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y))))
self.assertEqual(z, z2)
def test_python_ir(self):
x = Variable(torch.Tensor([0.4]), requires_grad=True)
y = Variable(torch.Tensor([0.7]), requires_grad=True)
def doit(x, y):
return torch.sigmoid(torch.tanh(x * (x + y)))
traced, _ = torch.jit.trace(doit, (x, y))
g = torch._C._jit_get_graph(traced)
g2 = torch._C.Graph()
g_to_g2 = {}
for node in g.inputs():
g_to_g2[node] = g2.addInput()
for node in g.nodes():
n_ = g2.createClone(node, lambda x: g_to_g2[x])
g2.appendNode(n_)
for o, no in zip(node.outputs(), n_.outputs()):
g_to_g2[o] = no
for node in g.outputs():
g2.registerOutput(g_to_g2[node])
t_node = g2.create("TensorTest").t_("a", torch.ones([2, 2]))
assert(t_node.attributeNames() == ["a"])
g2.appendNode(t_node)
assert(torch.equal(torch.ones([2, 2]), t_node.t("a")))
self.assertExpected(str(g2))
def tanh_quantize(input, bits):
assert bits >= 1, bits
if bits == 1:
return torch.sign(input)
input = torch.tanh(input) # [-1, 1]
input_rescale = (input + 1.0) / 2 #[0, 1]
n = math.pow(2.0, bits) - 1
v = torch.floor(input_rescale * n + 0.5) / n
v = 2 * v - 1 # [-1, 1]
v = 0.5 * torch.log((1 + v) / (1 - v)) # arctanh
return v
def duplicate_model_with_quant(model, bits, overflow_rate=0.0, counter=10, type='linear'):
"""assume that original model has at least a nn.Sequential"""
assert type in ['linear', 'minmax', 'log', 'tanh']
if isinstance(model, nn.Sequential):
l = OrderedDict()
for k, v in model._modules.items():
if isinstance(v, (nn.Conv2d, nn.Linear, nn.BatchNorm1d, nn.BatchNorm2d, nn.AvgPool2d)):
l[k] = v
if type == 'linear':
quant_layer = LinearQuant('{}_quant'.format(k), bits=bits, overflow_rate=overflow_rate, counter=counter)
elif type == 'log':
# quant_layer = LogQuant('{}_quant'.format(k), bits=bits, overflow_rate=overflow_rate, counter=counter)
quant_layer = NormalQuant('{}_quant'.format(k), bits=bits, quant_func=log_minmax_quantize)
elif type == 'minmax':
quant_layer = NormalQuant('{}_quant'.format(k), bits=bits, quant_func=min_max_quantize)
else:
quant_layer = NormalQuant('{}_quant'.format(k), bits=bits, quant_func=tanh_quantize)
l['{}_{}_quant'.format(k, type)] = quant_layer
else:
l[k] = duplicate_model_with_quant(v, bits, overflow_rate, counter, type)
m = nn.Sequential(l)
return m
else:
for k, v in model._modules.items():
model._modules[k] = duplicate_model_with_quant(v, bits, overflow_rate, counter, type)
return model
def forward(self, x, hidden):
do_dropout = self.training and self.dropout > 0.0
h, c = hidden
h = h.view(h.size(1), -1)
c = c.view(c.size(1), -1)
x = x.view(x.size(1), -1)
# Linear mappings
preact = self.i2h(x) + self.h2h(h)
# activations
gates = preact[:, :3 * self.hidden_size].sigmoid()
g_t = preact[:, 3 * self.hidden_size:].tanh()
i_t = gates[:, :self.hidden_size]
f_t = gates[:, self.hidden_size:2 * self.hidden_size]
o_t = gates[:, -self.hidden_size:]
# cell computations
if do_dropout and self.dropout_method == 'semeniuta':
g_t = F.dropout(g_t, p=self.dropout, training=self.training)
c_t = th.mul(c, f_t) + th.mul(i_t, g_t)
if do_dropout and self.dropout_method == 'moon':
c_t.data.set_(th.mul(c_t, self.mask).data)
c_t.data *= 1.0/(1.0 - self.dropout)
h_t = th.mul(o_t, c_t.tanh())
# Reshape for compatibility
if do_dropout:
if self.dropout_method == 'pytorch':
F.dropout(h_t, p=self.dropout, training=self.training, inplace=True)
if self.dropout_method == 'gal':
h_t.data.set_(th.mul(h_t, self.mask).data)
h_t.data *= 1.0/(1.0 - self.dropout)
h_t = h_t.view(1, h_t.size(0), -1)
c_t = c_t.view(1, c_t.size(0), -1)
return h_t, (h_t, c_t)