def __init__(self, input_dim, z_dim, class_num, batch_size):
self.input_dim = input_dim
self.z_dim = z_dim
self.class_num = class_num
self.batch_size = batch_size
self.lr = 0.0001
# -- encoder -------
self.encoder = Encoder([input_dim, 1200, 600, 100], z_dim)
# -- decoder -------
self.decoder = Decoder([z_dim, 100, 600, 1200, input_dim])
# -- discriminator --
self.discriminator = Discriminator([z_dim + (class_num + 1), 50, 20, 10, 1])
# -- sampler ----
self.sampler = Sampler(class_num)
python类Decoder()的实例源码
def __init__(self, vsize, esize, hsize, asize, buckets, **kwargs):
super(PointerNet, self).__init__()
self.name = kwargs.get('name', self.__class__.__name__)
self.scope = kwargs.get('scope', self.name)
self.enc_vsize = vsize
self.enc_esize = esize
self.enc_hsize = hsize
self.dec_msize = self.enc_hsize * 2 # concatenation of bidirectional RNN states
self.dec_isize = self.enc_hsize * 2 # concatenation of bidirectional RNN states
self.dec_hsize = hsize
self.dec_asize = asize
self.buckets = buckets
self.max_len = self.buckets[-1]
self.max_grad_norm = kwargs.get('max_grad_norm', 100)
self.optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
# self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-2)
self.num_layer = kwargs.get('num_layer', 1)
self.rnn_class = kwargs.get('rnn_class', tf.nn.rnn_cell.BasicLSTMCell)
# self.rnn_class = kwargs.get('rnn_class', tf.nn.rnn_cell.GRUCell)
self.encoder = Encoder(self.enc_vsize, self.enc_esize, self.enc_hsize,
rnn_class=self.rnn_class, num_layer = self.num_layer)
if kwargs.get('tree_decoder', False):
self.decoder = TreeDecoder(self.dec_isize, self.dec_hsize, self.dec_msize, self.dec_asize, self.max_len,
rnn_class=self.rnn_class, num_layer = self.num_layer, epsilon=1.0)
else:
self.decoder = Decoder(self.dec_isize, self.dec_hsize, self.dec_msize, self.dec_asize, self.max_len,
rnn_class=self.rnn_class, num_layer = self.num_layer, epsilon=1.0)
self.baselines = []
self.bl_ratio = kwargs.get('bl_ratio', 0.95)
for i in range(self.max_len):
self.baselines.append(tf.Variable(0.0, trainable=False))
def __init__(self, z_dim, batch_size):
self.z_dim = z_dim
self.batch_size = batch_size
self.lr = 0.0001
self.gamma = 0.5
# -- encoder -------
self.enc = Encoder([3, 64, 128, 256], 2048, z_dim)
# -- decoder -------
self.dec = Decoder(z_dim, [256, 128, 32, 3])
# -- discriminator --
self.disc = Discriminator([3, 32, 128, 256, 256], 512)
def ParseFields(input_stream):
delta_bytes = 0
total_bytes = 0
total_raw_bytes = 0
d = Decoder(input_stream)
fields = {}
while True:
try:
(field_num, wire_type) = d.ReadFieldNumberAndWireType()
except EOFError:
break
filed_encoder = Encoder() # encode the bytes back
tag_bytes = d.Position() - total_bytes
if wire_type == WIRETYPE_VARINT:
uint64_value = d.ReadUInt64()
filed_encoder.AppendUInt64(uint64_value)
value = uint64_value
elif wire_type == WIRETYPE_FIXED64:
value = d.ReadDouble()
filed_encoder.AppendDouble(value)
elif wire_type == WIRETYPE_FIXED32:
value = d.ReadFloat()
filed_encoder.AppendFloat(value)
elif wire_type == WIRETYPE_LENGTH_DELIMITED:
value = d.ReadString()
filed_encoder.AppendString(value)
else:
raise IOError()
delta_bytes = d.Position() - total_bytes
total_bytes = d.Position()
raw_bytes = delta_bytes - tag_bytes
total_raw_bytes += raw_bytes
fields[field_num] = ProtoField(field_num, wire_type, value, filed_encoder.RawBuffer())
return fields
def find_oep(insBytes):
"""
Finds the original entry point of a code object obfuscated by PjOrion.
If the entrypoint does not match the predefine signature it will return 0.
:param insBytes: the code object
:type insBytes: bytearray
:returns: the entrypoint
:rtype: int
"""
dec = Decoder(insBytes)
ins = dec.decode_at(0)
try:
# First instruction sets up an exception handler
assert ins.mnemonic == 'SETUP_EXCEPT'
# Get location of exception handler
exc_handler = 0 + ins.arg + ins.size
# Second instruction is intentionally invalid, on execution
# control transfers to exception handler
assert dec.decode_at(3).is_opcode_valid() == False
assert dec.decode_at(exc_handler).mnemonic == 'POP_TOP'
assert dec.decode_at(exc_handler + 1).mnemonic == 'POP_TOP'
assert dec.decode_at(exc_handler + 2).mnemonic == 'POP_TOP'
logger.debug('Code entrypoint matched PjOrion signature v1')
oep = exc_handler + 3
except:
if ins.mnemonic == 'JUMP_FORWARD':
oep = 0 + ins.arg + ins.size
logger.debug('Code entrypoint matched PjOrion signature v2')
elif ins.mnemonic == 'JUMP_ABSOLUTE':
oep = ins.arg
logger.debug('Code entrypoint matched PjOrion signature v2')
else:
logger.warning('Code entrypoint did not match PjOrion signature')
oep = 0
return oep
def construct_basic_blocks(self):
"""
Once we have obtained the leaders, i.e. the boundaries where a basic block may start or end,
we need to build the basic blocks by parsing the leaders. A basic block spans from the starting leader
upto the immediate next end leader as per their addresses.
"""
logger.debug('Constructing basic blocks...')
idx = 0
dec = Decoder(self.insBytes)
while idx < len(self.leaders):
# Get a pair of leaders
leader1, leader2 = self.leaders[idx], self.leaders[idx + 1]
# Get the addresses of the respective leaders
addr1, addr2 = leader1.address, leader2.address
# Create a new basic block
bb = BasicBlock()
# Set the address of the basic block
bb.address = addr1
# The offset variable is used track the position of the individual instructions within the basic block
offset = 0
# Store the basic block at the entrypoint separately
if addr1 == self.entrypoint:
self.bb_graph.add_node(bb, isEntry=True)
else:
self.bb_graph.add_node(bb)
# Add the basic block to the graph
self.bb_graph.add_node(bb)
# Leader1 is start leader, leader2 is end leader
# All instructions inclusive of leader1 and leader2 are part of this basic block
if leader1.type == 'S' and leader2.type == 'E':
logger.debug(
'Creating basic block {} spanning from {} to {}, both inclusive'.format(hex(id(bb)),
leader1.address,
leader2.address))
while addr1 + offset <= addr2:
ins = dec.decode_at(addr1 + offset)
bb.add_instruction(ins)
offset += ins.size
idx += 2
# Both Leader1 and leader2 are start leader
# Instructions inclusive of leader1 but exclusive of leader2 are part of this basic block
elif leader1.type == 'S' and leader2.type == 'S':
logger.debug(
'Creating basic block {} spanning from {} to {}, end exclusive'.format(hex(id(bb)), leader1.address,
leader2.address))
while addr1 + offset < addr2:
ins = dec.decode_at(addr1 + offset)
bb.add_instruction(ins)
offset += ins.size
idx += 1
logger.debug('{} basic blocks created'.format(self.bb_graph.number_of_nodes()))
def main(unused_argv):
vocab = dataset.Vocab(FLAGS.vocab_path, 200000)
# Check for presence of required special tokens.
assert vocab.tokenToId(dataset.PAD_TOKEN) > 0
assert vocab.tokenToId(dataset.UNKNOWN_TOKEN) > 0
assert vocab.tokenToId(dataset.SENTENCE_START) > 0
assert vocab.tokenToId(dataset.SENTENCE_END) > 0
assert vocab.tokenToId(dataset.WORD_BEGIN) > 0
assert vocab.tokenToId(dataset.WORD_CONTINUE) > 0
assert vocab.tokenToId(dataset.WORD_END) > 0
params = selector.parameters(
mode=FLAGS.mode, # train, eval, decode
min_lr=0.01, # min learning rate.
lr=0.1, # learning rate
batch_size=1,
c_timesteps=600, # context length
q_timesteps=30, # question length
min_input_len=2, # discard context, question < than this words
hidden_size=200, # for rnn cell and embedding
emb_size=200, # If 0, don't use embedding
max_decode_steps=4,
maxout_size=32,
max_grad_norm=2)
batcher = batch_reader.Generator(
FLAGS.data_path, vocab, params,
FLAGS.context_key, FLAGS.question_key, FLAGS.answer_key,
FLAGS.max_context_sentences, FLAGS.max_question_sentences,
bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input)
tf.set_random_seed(FLAGS.random_seed)
if params.mode == 'train':
model = selector.Model(
params, len(vocab), num_cpus=FLAGS.num_cpus, num_gpus=FLAGS.num_gpus)
_train(model, batcher)
elif params.mode == 'eval':
model = selector.Model(
params, len(vocab), num_cpus=FLAGS.num_cpus, num_gpus=FLAGS.num_gpus)
_eval(model, batcher)
elif params.mode == 'decode':
model = selector.Model(
params, len(vocab), num_cpus=FLAGS.num_cpus, num_gpus=FLAGS.num_gpus)
machine = decoder.Decoder(model, batcher, params, vocab)
machine.loop()
def decode(self, reader, writer):
'''
compute pseudo likelihoods the testing set
Args:
reader: a feature reader object to read features to decode
writer: a writer object to write likelihoods
'''
#create a decoder
decoder = Decoder(self.dnn, self.input_dim, reader.max_input_length)
#read the prior
prior = np.load(self.conf['savedir'] + '/prior.npy')
#start tensorflow session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True #pylint: disable=E1101
with tf.Session(graph=decoder.graph, config=config):
#load the model
decoder.restore(self.conf['savedir'] + '/final')
#feed the utterances one by one to the neural net
while True:
utt_id, utt_mat, looped = reader.get_utt()
if looped:
break
#compute predictions
output = decoder(utt_mat)
#get state likelihoods by dividing by the prior
output = output/prior
#floor the values to avoid problems with log
np.where(output == 0, np.finfo(float).eps, output)
#write the pseudo-likelihoods in kaldi feature format
writer.write_next_utt(utt_id, np.log(output))
#close the writer
writer.close()
def __call__(self, html):
return web2pyHTMLParser(decoder.decoder(html)).tree
def __call__(self, html):
return web2pyHTMLParser(decoder.decoder(html)).tree
def __call__(self, html):
return web2pyHTMLParser(decoder.decoder(html)).tree
def __call__(self, html):
return web2pyHTMLParser(decoder.decoder(html)).tree
def __call__(self, html):
return web2pyHTMLParser(decoder.decoder(html)).tree