def exe_rnn(use_embedd, length, num_units, position, binominal):
batch_size = BATCH_SIZE
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
target_var = T.ivector(name='targets')
layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')
if use_embedd:
layer_position = construct_position_input(batch_size, length, num_units)
layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2)
layer_rnn = RecurrentLayer(layer_input, num_units, nonlinearity=nonlinearities.tanh, only_return_final=True,
W_in_to_hid=lasagne.init.GlorotUniform(), W_hid_to_hid=lasagne.init.GlorotUniform(),
b=lasagne.init.Constant(0.), name='RNN')
# W = layer_rnn.W_hid_to_hid.sum()
# U = layer_rnn.W_in_to_hid.sum()
# b = layer_rnn.b.sum()
layer_output = DenseLayer(layer_rnn, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')
return train(layer_output, layer_rnn, input_var, target_var, batch_size, length, position, binominal)
python类tanh()的实例源码
def build_BiRNN_CNN(incoming1, incoming2, num_units, mask=None, grad_clipping=0, nonlinearity=nonlinearities.tanh,
precompute_input=True, num_filters=20, dropout=True, in_to_out=False):
# first get some necessary dimensions or parameters
conv_window = 3
_, sent_length, _ = incoming2.output_shape
# dropout before cnn?
if dropout:
incoming1 = lasagne.layers.DropoutLayer(incoming1, p=0.5)
# construct convolution layer
cnn_layer = lasagne.layers.Conv1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
# infer the pool size for pooling (pool size should go through all time step of cnn)
_, _, pool_size = cnn_layer.output_shape
# construct max pool layer
pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size)
# reshape the layer to match rnn incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters]
output_cnn_layer = lasagne.layers.reshape(pool_layer, (-1, sent_length, [1]))
# finally, concatenate the two incoming layers together.
incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)
return build_BiRNN(incoming, num_units, mask=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearity,
precompute_input=precompute_input, dropout=dropout, in_to_out=in_to_out)
def build_BiLSTM_CNN(incoming1, incoming2, num_units, mask=None, grad_clipping=0, precompute_input=True,
peepholes=False, num_filters=20, dropout=True, in_to_out=False):
# first get some necessary dimensions or parameters
conv_window = 3
_, sent_length, _ = incoming2.output_shape
# dropout before cnn?
if dropout:
incoming1 = lasagne.layers.DropoutLayer(incoming1, p=0.5)
# construct convolution layer
cnn_layer = lasagne.layers.Conv1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
# infer the pool size for pooling (pool size should go through all time step of cnn)
_, _, pool_size = cnn_layer.output_shape
# construct max pool layer
pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size)
# reshape the layer to match lstm incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters]
output_cnn_layer = lasagne.layers.reshape(pool_layer, (-1, sent_length, [1]))
# finally, concatenate the two incoming layers together.
incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2)
return build_BiLSTM(incoming, num_units, mask=mask, grad_clipping=grad_clipping, peepholes=peepholes,
precompute_input=precompute_input, dropout=dropout, in_to_out=in_to_out)
def __init__(self):
self.hdn = None
self.rep = None
self.fname_in = None
self.mod2size = None
self.mod1size = None
self.fname_out = None
self.lr = 0.1
self.act = "tanh"
self.epochs = 1000
self.verbosity = 3
self.dropout = 0.2
self.momentum = 0.9
self.untied = False
self.l2_norm = False
self.batch_size = 128
self.load_model = None
self.save_model = None
self.write_after = None
self.crossmodal = False
self.exec_command = None
self.ignore_zeroes = False
def setup_transform_net(self, input_var=None):
transform_net = InputLayer(shape=self.shape, input_var=input_var)
transform_net = style_conv_block(transform_net, self.num_styles, 32, 9, 1)
transform_net = style_conv_block(transform_net, self.num_styles, 64, 3, 2)
transform_net = style_conv_block(transform_net, self.num_styles, 128, 3, 2)
for _ in range(5):
transform_net = residual_block(transform_net, self.num_styles)
transform_net = nn_upsample(transform_net, self.num_styles)
transform_net = nn_upsample(transform_net, self.num_styles)
if self.net_type == 0:
transform_net = style_conv_block(transform_net, self.num_styles, 3, 9, 1, tanh)
transform_net = ExpressionLayer(transform_net, lambda X: 150.*X, output_shape=None)
elif self.net_type == 1:
transform_net = style_conv_block(transform_net, self.num_styles, 3, 9, 1, sigmoid)
self.network['transform_net'] = transform_net
def build_cnn(self):
# Building the network
layer_in = InputLayer(shape=(None, 784), input_var=self.input_var)
# Hidden layer
layer = DenseLayer(
layer_in,
num_units=self.hidden_size,
W=lasagne.init.Uniform(
range=(-np.sqrt(6. / (784 + self.hidden_size)),
np.sqrt(6. / (784 + self.hidden_size)))),
nonlinearity=tanh,
)
# LR layer
layer = DenseLayer(
layer,
num_units=self.output_size,
W=lasagne.init.Constant(0.),
nonlinearity=softmax,
)
return layer
def build_multi_dssm(input_var=None, num_samples=None, num_entries=6, num_ngrams=42**3, num_hid1=300, num_hid2=300, num_out=128):
"""Builds a DSSM structure in a Lasagne/Theano way.
The built DSSM is the neural network that computes the projection of only one paper.
The input ``input_var`` should have two dimensions: (``num_samples * num_entries``, ``num_ngrams``).
The output is then computed in a batch way: one paper at a time, but all papers from the same sample in the dataset are grouped
(cited papers, citing papers and ``num_entries - 2`` irrelevant papers).
Args:
input_var (:class:`theano.tensor.TensorType` or None): symbolic input variable of the DSSM
num_samples (int): the number of samples in the batch input dataset (number of rows)
num_entries (int): the number of compared papers in the DSSM structure
num_ngrams (int): the size of the vocabulary
num_hid1 (int): the number of units in the first hidden layer
num_hid2 (int): the number of units in the second hidden layer
num_out (int): the number of units in the output layer
Returns:
:class:`lasagne.layers.Layer`: the output layer of the DSSM
"""
assert (num_entries > 2)
# Initialise input layer
if num_samples is None:
num_rows = None
else:
num_rows = num_samples * num_entries
l_in = layers.InputLayer(shape=(num_rows, num_ngrams), input_var=input_var)
# Initialise the hidden and output layers or the DSSM
l_hid1 = layers.DenseLayer(l_in, num_units=num_hid1, nonlinearity=nonlinearities.tanh, W=init.GlorotUniform())
l_hid2 = layers.DenseLayer(l_hid1, num_units=num_hid2, nonlinearity=nonlinearities.tanh, W=init.GlorotUniform())
l_out = layers.DenseLayer(l_hid2, num_units=num_out, nonlinearity=nonlinearities.tanh, W=init.GlorotUniform())
l_out = layers.ExpressionLayer(l_out, lambda X: X / X.norm(2), output_shape='auto')
return l_out
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def exe_maxru(length, num_units, position, binominal):
batch_size = BATCH_SIZE
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
target_var = T.ivector(name='targets')
layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')
time_updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
time_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
layer_taru = MAXRULayer(layer_input, num_units, max_length=length,
P_time=lasagne.init.GlorotUniform(), nonlinearity=nonlinearities.tanh,
resetgate=resetgate, updategate=updategate, hidden_update=hiden_update,
time_updategate=time_updategate, time_update=time_update,
only_return_final=True, name='MAXRU', p=0.)
# W = layer_taru.W_hid_to_hidden_update.sum()
# U = layer_taru.W_in_to_hidden_update.sum()
# b = layer_taru.b_hidden_update.sum()
layer_output = DenseLayer(layer_taru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')
return train(layer_output, input_var, target_var, batch_size, length, position, binominal)
def exe_lstm(use_embedd, length, num_units, position, binominal):
batch_size = BATCH_SIZE
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
target_var = T.ivector(name='targets')
layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')
if use_embedd:
layer_position = construct_position_input(batch_size, length, num_units)
layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2)
ingate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
outgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
forgetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
# now use tanh for nonlinear function of cell, need to try pure linear cell
cell = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
layer_lstm = LSTMLayer(layer_input, num_units, ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate,
peepholes=False, nonlinearity=nonlinearities.tanh, only_return_final=True, name='LSTM')
# W = layer_lstm.W_hid_to_cell.sum()
# U = layer_lstm.W_in_to_cell.sum()
# b = layer_lstm.b_cell.sum()
layer_output = DenseLayer(layer_lstm, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')
return train(layer_output, layer_lstm, input_var, target_var, batch_size, length, position, binominal)
def exe_sgru(use_embedd, length, num_units, position, binominal):
batch_size = BATCH_SIZE
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
target_var = T.ivector(name='targets')
layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')
if use_embedd:
layer_position = construct_position_input(batch_size, length, num_units)
layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2)
resetgate_input = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
resetgate_hidden = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
layer_sgru = SGRULayer(layer_input, num_units, resetgate_input=resetgate_input, resetgate_hidden=resetgate_hidden,
updategate=updategate, hidden_update=hiden_update, only_return_final=True, name='SGRU')
# W = layer_gru.W_hid_to_hidden_update.sum()
# U = layer_gru.W_in_to_hidden_update.sum()
# b = layer_gru.b_hidden_update.sum()
layer_output = DenseLayer(layer_sgru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')
return train(layer_output, layer_sgru, input_var, target_var, batch_size, length, position, binominal)
def __init__(self, incoming, num_units, ingate=Gate(), forgetgate=Gate(),
cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh), outgate=Gate(),
nonlinearity=nonlinearities.tanh, cell_init=init.Constant(0.), hid_init=init.Constant(0.),
backwards=False, learn_init=False, peepholes=True, gradient_steps=-1, grad_clipping=0,
unroll_scan=False, precompute_input=True, mask_input=None, **kwargs):
super(CustomLSTMEncoder, self).__init__(incoming, num_units, ingate, forgetgate, cell, outgate, nonlinearity,
cell_init, hid_init, backwards, learn_init, peepholes, gradient_steps,
grad_clipping, unroll_scan, precompute_input, mask_input, False,
**kwargs)
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def build_BiRNN(incoming, num_units, mask=None, grad_clipping=0, nonlinearity=nonlinearities.tanh,
precompute_input=True, dropout=True, in_to_out=False):
# construct the forward and backward rnns. Now, Ws are initialized by He initializer with default arguments.
# Need to try other initializers for specific tasks.
# dropout for incoming
if dropout:
incoming = lasagne.layers.DropoutLayer(incoming, p=0.5)
rnn_forward = lasagne.layers.RecurrentLayer(incoming, num_units,
mask_input=mask, grad_clipping=grad_clipping,
nonlinearity=nonlinearity, precompute_input=precompute_input,
W_in_to_hid=lasagne.init.GlorotUniform(),
W_hid_to_hid=lasagne.init.GlorotUniform(), name='forward')
rnn_backward = lasagne.layers.RecurrentLayer(incoming, num_units,
mask_input=mask, grad_clipping=grad_clipping,
nonlinearity=nonlinearity, precompute_input=precompute_input,
W_in_to_hid=lasagne.init.GlorotUniform(),
W_hid_to_hid=lasagne.init.GlorotUniform(), backwards=True,
name='backward')
# concatenate the outputs of forward and backward RNNs to combine them.
concat = lasagne.layers.concat([rnn_forward, rnn_backward], axis=2, name="bi-rnn")
# dropout for output
if dropout:
concat = lasagne.layers.DropoutLayer(concat, p=0.5)
if in_to_out:
concat = lasagne.layers.concat([concat, incoming], axis=2)
# the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units)
return concat
def build_BiLSTM_HighCNN(incoming1, incoming2, num_units, mask=None, grad_clipping=0, precompute_input=True,
peepholes=False, num_filters=20, dropout=True, in_to_out=False):
# first get some necessary dimensions or parameters
conv_window = 3
_, sent_length, _ = incoming2.output_shape
# dropout before cnn
if dropout:
incoming1 = lasagne.layers.DropoutLayer(incoming1, p=0.5)
# construct convolution layer
cnn_layer = lasagne.layers.Conv1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full',
nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
# infer the pool size for pooling (pool size should go through all time step of cnn)
_, _, pool_size = cnn_layer.output_shape
# construct max pool layer
pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size)
# reshape the layer to match highway incoming layer [batch * sent_length, num_filters, 1] --> [batch * sent_length, num_filters]
output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], -1))
# dropout after cnn?
# if dropout:
# output_cnn_layer = lasagne.layers.DropoutLayer(output_cnn_layer, p=0.5)
# construct highway layer
highway_layer = HighwayDenseLayer(output_cnn_layer, nonlinearity=nonlinearities.rectify)
# reshape the layer to match lstm incoming layer [batch * sent_length, num_filters] --> [batch, sent_length, number_filters]
output_highway_layer = lasagne.layers.reshape(highway_layer, (-1, sent_length, [1]))
# finally, concatenate the two incoming layers together.
incoming = lasagne.layers.concat([output_highway_layer, incoming2], axis=2)
return build_BiLSTM(incoming, num_units, mask=mask, grad_clipping=grad_clipping, peepholes=peepholes,
precompute_input=precompute_input, dropout=dropout, in_to_out=in_to_out)
def __init__(self, incomings, hid_state_size, voc_size,
resetgate = GRU_Gate(), updategate = GRU_Gate(),
hid_update = GRU_Gate(nonlinearity=nonlin.tanh),
W=Normal(), max_answer_word=1, **kwargs):
super(AnswerModule, self).__init__(incomings, **kwargs)
self.hid_state_size = hid_state_size
#FOR GRU
input_shape = self.input_shapes[0]
num_inputs = np.prod(input_shape[1]) + voc_size # concatenation of previous prediction
def add_gate(gate, gate_name):
return (self.add_param(gate.W_in, (num_inputs, hid_state_size),
name="W_in_to_{}".format(gate_name)),
self.add_param(gate.W_hid, (hid_state_size, hid_state_size),
name="W_hid_to_{}".format(gate_name)),
self.add_param(gate.b, (hid_state_size,),
name="b_{}".format(gate_name), regularizable=False),
gate.nonlinearity)
# Add in all parameters from gates
(self.W_in_to_updategate,
self.W_hid_to_updategate,
self.b_updategate,
self.nonlinearity_updategate)= add_gate(updategate, 'updategate')
(self.W_in_to_resetgate,
self.W_hid_to_resetgate,
self.b_resetgate,
self.nonlinearity_resetgate) = add_gate(resetgate, 'resetgate')
(self.W_in_to_hid_update,
self.W_hid_to_hid_update,
self.b_hid_update,
self.nonlinearity_hid) = add_gate(hid_update, 'hid_update')
self.W = self.add_param(W, (hid_state_size, voc_size), name="W")
self.max_answer_word = max_answer_word
self.rand_stream = RandomStreams(np.random.randint(1, 2147462579))
def run_task(*_):
trpo_stepsize = 0.01
trpo_subsample_factor = 0.2
env = PointGatherEnv(apple_reward=10,bomb_cost=1,n_apples=2, activity_range=6)
policy = GaussianMLPPolicy(env.spec,
hidden_sizes=(64,32)
)
baseline = GaussianMLPBaseline(
env_spec=env.spec,
regressor_args={
'hidden_sizes': (64,32),
'hidden_nonlinearity': NL.tanh,
'learn_std':False,
'step_size':trpo_stepsize,
'optimizer':ConjugateGradientOptimizer(subsample_factor=trpo_subsample_factor)
}
)
safety_constraint = GatherSafetyConstraint(max_value=0.1)
algo = PDO(
env=env,
policy=policy,
baseline=baseline,
safety_constraint=safety_constraint,
batch_size=50000,
max_path_length=15,
n_itr=100,
gae_lambda=0.95,
discount=0.995,
step_size=trpo_stepsize,
optimizer_args={'subsample_factor':trpo_subsample_factor},
#plot=True,
)
algo.train()
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(
env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def get_output_for(self, arguments, **kwargs):
input, hprev, Cprev = arguments
i = nl.sigmoid(self.Wi * input + self.Ui* hprev + self.bi)
cand = nl.tanh(self.Wc *input + self.Uc * hprev + self.bc)
f = nl.sigmoid(self.Wf*input + self.Uf*hprev + self.bf)
C = i*cand + f * Cprev
o = nl.sigmoid(self.Wo*input + self.Uo*hprev + self.Vo*C + self.bo)
h = o*nl.tanh(C)
return h, C
def build_combination(input_var, output_nodes, input_size, stocks, period, feature_types):
# Input layer
input_layer = InputLayer(shape=(None, 1, input_size), input_var=input_var)
assert input_size == stocks * period * feature_types
input_layer = ReshapeLayer(input_layer, (([0], stocks, period, feature_types)))
#slice for partition
stock_feature_type_layers = []
for ix in range(stocks):
stock_layer = SliceLayer(input_layer, indices=ix, axis=1)
this_stock_feature_type_layers = []
for rx in range(feature_types):
this_stock_feature_type_layers.append(SliceLayer(stock_layer, indices=rx, axis=1))
stock_feature_type_layers.append(this_stock_feature_type_layers)
stock_networks = []
for this_stock_feature_type_layers in stock_feature_type_layers:
this_stock_networks = []
for feature_type_layer in this_stock_feature_type_layers:
tmp = DenseLayer(dropout(feature_type_layer, p=.2),
num_units=10, nonlinearity=tanh)
tmp = DenseLayer(dropout(tmp, p=.5), num_units=1, nonlinearity=tanh)
this_stock_networks.append(tmp)
this_stock_network = ConcatLayer(this_stock_networks)
stock_network = DenseLayer(dropout(this_stock_network, p=.5),
num_units=1, nonlinearity=tanh)
stock_networks.append(stock_network)
network = ConcatLayer(stock_networks)
network = DenseLayer(dropout(network, p=.5),
num_units=output_nodes, nonlinearity=sigmoid)
return network, stock_networks
def __init__(self, incoming, num_units,
ingate=Gate(),
forgetgate=Gate(),
cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh),
outgate=Gate(),
nonlinearity=nonlinearities.tanh,
cell_init=init.Constant(0.),
hid_init=init.Constant(0.),
backwards=False,
learn_init=False,
peepholes=True,
gradient_steps=-1,
grad_clipping=0,
unroll_scan=False,
precompute_input=True,
mask_input=None,
only_return_final=False,
inter_drop=0.05,
**kwargs):
super(DropLSTMLayer, self).__init__(incoming, num_units,
ingate, forgetgate, cell, outgate,
nonlinearity, cell_init, hid_init,
backwards, learn_init, peepholes,
gradient_steps, grad_clipping, unroll_scan,
precompute_input, mask_input, only_return_final, **kwargs)
self.inter_retain_prob = 1 - inter_drop
self._srng = RandomStreams(
lasagne.random.get_rng().randint(1, 2147462579))
def generate_lstm_parameters():
gate_parameters = Gate(
W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(),
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
return gate_parameters, cell_parameters
def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26,
w_init=las.init.Orthogonal()):
gate_parameters = Gate(
W_in=w_init, W_hid=w_init,
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=w_init, W_hid=w_init,
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
l_in = InputLayer(input_shape, input_var, 'input')
l_mask = InputLayer(mask_shape, mask_var, 'mask')
f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm')
l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum')
l_forward_slice1 = SliceLayer(l_sum, -1, 1, name='slice1')
# Now, we can apply feed-forward layers as usual.
# We want the network to predict a classification for the sequence,
# so we'll use a the number of classes.
l_out = DenseLayer(
l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output')
return l_out
def create_model(input_shape, input_var, mask_shape, mask_var, window, lstm_size=250, output_classes=26,
w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True):
gate_parameters = Gate(
W_in=w_init, W_hid=w_init,
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=w_init, W_hid=w_init,
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
l_in = InputLayer(input_shape, input_var, 'input')
l_mask = InputLayer(mask_shape, mask_var, name='mask')
symbolic_seqlen = l_in.input_var.shape[1]
l_delta = DeltaLayer(l_in, window, name='delta')
if use_blstm:
f_lstm, b_lstm = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum')
# reshape to (num_examples * seq_len, lstm_size)
l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape')
else:
l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape')
# Now, we can apply feed-forward layers as usual.
# We want the network to predict a classification for the sequence,
# so we'll use a the number of classes.
l_softmax = DenseLayer(
l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')
l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')
return l_out
def create_model(substreams, mask_shape, mask_var, lstm_size=250, output_classes=26,
fusiontype='concat', w_init_fn=las.init.Orthogonal(), use_peepholes=True):
gate_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=w_init_fn, W_hid=w_init_fn,
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
l_mask = InputLayer(mask_shape, mask_var, 'mask')
symbolic_seqlen_raw = l_mask.input_var.shape[1]
# We'll combine the forward and backward layer output by summing.
# Merge layers take in lists of layers to merge as input.
if fusiontype == 'adasum':
l_fuse = AdaptiveElemwiseSumLayer(substreams, name='adasum1')
elif fusiontype == 'sum':
l_fuse = ElemwiseSumLayer(substreams, name='sum1')
elif fusiontype == 'concat':
l_fuse = ConcatLayer(substreams, axis=-1, name='concat')
f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg')
l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2')
# reshape to (num_examples * seq_len, lstm_size)
l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3')
# Now, we can apply feed-forward layers as usual.
# We want the network to predict a classification for the sequence,
# so we'll use a the number of classes.
l_softmax = DenseLayer(
l_reshape3, num_units=output_classes,
nonlinearity=las.nonlinearities.softmax, name='softmax')
l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_raw, output_classes), name='output')
return l_out, l_fuse
def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26,
w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True):
gate_parameters = Gate(
W_in=w_init, W_hid=w_init,
b=las.init.Constant(0.))
cell_parameters = Gate(
W_in=w_init, W_hid=w_init,
# Setting W_cell to None denotes that no cell connection will be used.
W_cell=None, b=las.init.Constant(0.),
# By convention, the cell nonlinearity is tanh in an LSTM.
nonlinearity=tanh)
l_in = InputLayer(input_shape, input_var, 'input')
l_mask = InputLayer(mask_shape, mask_var, 'mask')
symbolic_seqlen = l_in.input_var.shape[1]
if use_blstm:
f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum')
# reshape to (num_examples * seq_len, lstm_size)
l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape')
else:
l_lstm = create_lstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes)
l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape')
# Now, we can apply feed-forward layers as usual.
# We want the network to predict a classification for the sequence,
# so we'll use a the number of classes.
l_softmax = DenseLayer(
l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax')
l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output')
return l_out
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def __init__(self, W_t=init.Normal(0.1), W_x=init.Normal(0.1),
b=init.Constant(0.),
nonlinearity_inside=nonlinearities.tanh,
nonlinearity_outside=nonlinearities.sigmoid):
self.W_t = W_t
self.W_x = W_x
self.b = b
self.nonlinearity_inside = nonlinearity_inside
self.nonlinearity_outside = nonlinearity_outside
def __init__(
self,
env_spec,
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
num_seq_inputs=1,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])
def __init__(
self,
env_spec,
latent_dim=0, # all this is fake
latent_name='categorical',
bilinear_integration=False,
resample=False, # until here
hidden_sizes=(32, 32),
hidden_nonlinearity=NL.tanh,
prob_network=None,
):
"""
:param env_spec: A spec for the mdp.
:param hidden_sizes: list of sizes for the fully connected hidden layers
:param hidden_nonlinearity: nonlinearity used for each hidden layer
:param prob_network: manually specified network for this policy, other network params
are ignored
:return:
"""
#bullshit
self.latent_dim = latent_dim ##could I avoid needing this self for the get_action?
self.latent_name = latent_name
self.bilinear_integration = bilinear_integration
self.resample = resample
self._set_std_to_0 = False
Serializable.quick_init(self, locals())
assert isinstance(env_spec.action_space, Discrete)
if prob_network is None:
prob_network = MLP(
input_shape=(env_spec.observation_space.flat_dim,),
output_dim=env_spec.action_space.n,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
output_nonlinearity=NL.softmax,
)
self._l_prob = prob_network.output_layer
self._l_obs = prob_network.input_layer
self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(
prob_network.output_layer))
self._dist = Categorical(env_spec.action_space.n)
super(CategoricalMLPPolicy, self).__init__(env_spec)
LasagnePowered.__init__(self, [prob_network.output_layer])