def group_layer(self, group_num, filters, name, kernel_regularizer_l2):
def f(input):
if group_num == 1:
tower = Conv2D(filters, (1, 1), name=name + '_conv2d_0_1', padding='same',
kernel_initializer=IdentityConv())(input)
tower = Conv2D(filters, (3, 3), name=name + '_conv2d_0_2', padding='same',
kernel_initializer=IdentityConv(),
kernel_regularizer=regularizers.l2(kernel_regularizer_l2))(tower)
tower = PReLU()(tower)
return tower
else:
group_output = []
for i in range(group_num):
filter_num = filters / group_num
# if filters = 201, group_num = 4, make sure last group filters num = 51
if i == group_num - 1: # last group
filter_num = filters - i * (filters / group_num)
tower = Conv2D(filter_num, (1, 1), name=name + '_conv2d_' + str(i) + '_1', padding='same',
kernel_initializer=GroupIdentityConv(i, group_num))(input)
tower = Conv2D(filter_num, (3, 3), name=name + '_conv2d_' + str(i) + '_2', padding='same',
kernel_initializer=IdentityConv(),
kernel_regularizer=regularizers.l2(kernel_regularizer_l2))(tower)
tower = PReLU()(tower)
group_output.append(tower)
if K.image_data_format() == 'channels_first':
axis = 1
elif K.image_data_format() == 'channels_last':
axis = 3
output = Concatenate(axis=axis)(group_output)
return output
return f
python类concatenate()的实例源码
def mixture_of_gaussian_output(x, n_components):
mu = keras.layers.Dense(n_components, activation='linear')(x)
log_sig = keras.layers.Dense(n_components, activation='linear')(x)
pi = keras.layers.Dense(n_components, activation='softmax')(x)
return Concatenate(axis=1)([pi, mu, log_sig])
def two_blocks_dcnn(self):
"""
Method to model and compile the first CNN and the whole two blocks DCNN.
Also initialize the field cnn1
:return: Model, Two blocks DeepCNN compiled
"""
# input layers
input65 = Input(shape=(65, 65, 4))
input33 = Input(shape=(33, 33, 4))
# first CNN modeling
output_cnn1 = self.one_block_model(input65)
# first cnn compiling
cnn1 = Model(inputs=input65, outputs=output_cnn1)
sgd = SGD(lr=self.learning_rate, momentum=self.momentum_rate, decay=self.decay_rate, nesterov=False)
cnn1.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
# initialize the field cnn1
self.cnn1 = cnn1
print 'first CNN compiled!'
# concatenation of the output of the first CNN and the input of shape 33x33
conc_input = Concatenate(axis=-1)([input33, output_cnn1])
# second cnn modeling
output_dcnn = self.one_block_model(conc_input)
# whole dcnn compiling
dcnn = Model(inputs=[input65, input33], outputs=output_dcnn)
sgd = SGD(lr=self.learning_rate, momentum=self.momentum_rate, decay=self.decay_rate, nesterov=False)
dcnn.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
print 'DCNN compiled!'
return dcnn
def one_block_model(self, input_tensor):
"""
Method to model one cnn. It doesn't compile the model.
:param input_tensor: tensor, to feed the two path
:return: output: tensor, the output of the cnn
"""
# localPath
loc_path = Conv2D(64, (7, 7), data_format='channels_first', padding='valid', activation='relu', use_bias=True,
kernel_regularizer=regularizers.l1_l2(self.l1_rate, self.l2_rate),
kernel_constraint=max_norm(2.),
bias_constraint=max_norm(2.), kernel_initializer='lecun_uniform', bias_initializer='zeros')(input_tensor)
loc_path = MaxPooling2D(pool_size=(4, 4), data_format='channels_first', strides=1, padding='valid')(loc_path)
loc_path = Dropout(self.dropout_rate)(loc_path)
loc_path = Conv2D(64, (3, 3), data_format='channels_first', padding='valid', activation='relu', use_bias=True,
kernel_initializer='lecun_uniform', bias_initializer='zeros',
kernel_regularizer=regularizers.l1_l2(self.l1_rate, self.l2_rate),kernel_constraint=max_norm(2.),
bias_constraint=max_norm(2.))(loc_path)
loc_path = MaxPooling2D(pool_size=(2, 2), data_format='channels_first', strides=1, padding='valid')(loc_path)
loc_path = Dropout(self.dropout_rate)(loc_path)
# globalPath
glob_path = Conv2D(160, (13, 13), data_format='channels_first', strides=1, padding='valid', activation='relu', use_bias=True,
kernel_initializer='lecun_uniform', bias_initializer='zeros',
kernel_regularizer=regularizers.l1_l2(self.l1_rate, self.l2_rate),
kernel_constraint=max_norm(2.),
bias_constraint=max_norm(2.))(input_tensor)
glob_path = Dropout(self.dropout_rate)(glob_path)
# concatenation of the two path
path = Concatenate(axis=1)([loc_path, glob_path])
# output layer
output = Conv2D(5, (21, 21), data_format='channels_first', strides=1, padding='valid', activation='softmax', use_bias=True,
kernel_initializer='lecun_uniform', bias_initializer='zeros')(path)
return output
def build(self):
model = self.net.model
pi_model = self.net.pi_model
q_model = self.net.q_model
target_model = self.net.target_model
target_pi_model = self.net.target_pi_model
target_q_model = self.net.target_q_model
self.states = tf.placeholder(tf.float32, shape=(None, self.in_dim), name='states')
self.actions = tf.placeholder(tf.float32, shape=[None, self.action_dim], name='actions')
self.rewards = tf.placeholder(tf.float32, shape=[None], name='rewards')
self.next_states = tf.placeholder(tf.float32, shape=[None, self.in_dim], name='next_states')
# terminal contain only 0 or 1 it will work as masking
#self.terminals = tf.placeholder(tf.bool, shape=[None], name='terminals')
self.ys = tf.placeholder(tf.float32, shape=[None])
#y = tf.where(self.terminals, self.rewards, self.rewards + self.gamma * K.stop_gradient(K.sum(target_q_model(Concatenate()([target_model(self.next_states),
# target_pi_model(self.next_states)])), axis=-1)))
self.target_q = K.sum(target_q_model(Concatenate()([target_model(self.states), target_pi_model(self.states)])), axis=-1)
self.q = K.sum(q_model(Concatenate()([model(self.states), self.actions])), axis=-1)
self.q_loss = K.mean(K.square(self.ys-self.q))
self.mu = pi_model(self.states)
self.pi_loss = - K.mean(q_model(Concatenate()([model(self.states), self.mu])))
self.q_updater = self.q_optimizer.minimize(self.q_loss, var_list=self.net.var_q)
self.pi_updater = self.pi_opimizer.minimize(self.pi_loss, var_list=self.net.var_pi)
self.soft_updater = [K.update(t_p, t_p*(1-self.tau)+p*self.tau) for p, t_p in zip(self.net.var_all, self.net.var_target_all)]
self.sync = [K.update(t_p, p) for p, t_p in zip(self.net.var_all, self.net.var_target_all)]
self.sess.run(tf.global_variables_initializer())
self.built = True
def build(self):
model = self.net.model
mu_model = self.net.mu_model
log_std_model = self.net.log_std_model
q_model = self.net.q_model
target_model = self.net.target_model
target_mu_model = self.net.target_mu_model
target_log_std_model = self.net.target_log_std_model
target_q_model = self.net.target_q_model
self.states = tf.placeholder(tf.float32, shape=(None, self.in_dim), name='states')
self.actions = tf.placeholder(tf.float32, shape=[None, self.action_dim], name='actions')
self.rewards = tf.placeholder(tf.float32, shape=[None], name='rewards')
self.next_states = tf.placeholder(tf.float32, shape=[None, self.in_dim], name='next_states')
self.ys = tf.placeholder(tf.float32, shape=[None])
# There are other implementations about how can we take aciton.
# Taking next action version or using only mu version or searching action which maximize Q.
target_mu = target_mu_model(self.states)
target_log_std = target_log_std_model(self.states)
target_action = target_mu + K.random_normal(K.shape(target_mu), dtype=tf.float32) * K.exp(target_log_std)
self.target_q = K.sum(target_q_model(Concatenate()([target_model(self.states), target_action])), axis=-1)
self.q = K.sum(q_model(Concatenate()([model(self.states), self.actions])), axis=-1)
self.q_loss = K.mean(K.square(self.ys-self.q))
self.mu = mu_model(self.states)
self.log_std = log_std_model(self.states)
self.eta = (self.actions - self.mu) / K.exp(self.log_std)
inferred_action = self.mu + K.stop_gradient(self.eta) * K.exp(self.log_std)
self.pi_loss = - K.mean(q_model(Concatenate()([model(self.states), inferred_action])))
self.q_updater = self.q_optimizer.minimize(self.q_loss, var_list=self.net.var_q)
self.pi_updater = self.pi_opimizer.minimize(self.pi_loss, var_list=self.net.var_pi)
self.soft_updater = [K.update(t_p, t_p*(1-self.tau)+p*self.tau) for p, t_p in zip(self.net.var_all, self.net.var_target_all)]
self.sync = [K.update(t_p, p) for p, t_p in zip(self.net.var_all, self.net.var_target_all)]
self.sess.run(tf.global_variables_initializer())
self.built = True
def _build(self,input_shape):
dim = np.prod(input_shape) // 2
print("{} latent bits".format(dim))
M, N = self.parameters['M'], self.parameters['N']
x = Input(shape=input_shape)
_pre = tf.slice(x, [0,0], [-1,dim])
_suc = tf.slice(x, [0,dim], [-1,dim])
pre = wrap(x,_pre,name="pre")
suc = wrap(x,_suc,name="suc")
print("encoder")
_encoder = self.build_encoder([dim])
action_logit = ConditionalSequential(_encoder, pre, axis=1)(suc)
gs = self.build_gs()
action = gs(action_logit)
print("decoder")
_decoder = self.build_decoder([dim])
suc_reconstruction = ConditionalSequential(_decoder, pre, axis=1)(flatten(action))
y = Concatenate(axis=1)([pre,suc_reconstruction])
action2 = Input(shape=(N,M))
pre2 = Input(shape=(dim,))
suc_reconstruction2 = ConditionalSequential(_decoder, pre2, axis=1)(flatten(action2))
y2 = Concatenate(axis=1)([pre2,suc_reconstruction2])
def rec(x, y):
return bce(K.reshape(x,(K.shape(x)[0],dim*2,)),
K.reshape(y,(K.shape(x)[0],dim*2,)))
def loss(x, y):
kl_loss = gs.loss()
reconstruction_loss = rec(x, y)
return reconstruction_loss + kl_loss
self.metrics.append(rec)
self.callbacks.append(LambdaCallback(on_epoch_end=gs.cool))
self.custom_log_functions['tau'] = lambda: K.get_value(gs.tau)
self.loss = loss
self.encoder = Model(x, [pre,action])
self.decoder = Model([pre2,action2], y2)
self.net = Model(x, y)
self.autoencoder = self.net
def __init__(self, config, embeddings=None, ntags=None):
# build word embedding
word_ids = Input(batch_shape=(None, None), dtype='int32')
if embeddings is None:
word_embeddings = Embedding(input_dim=config.vocab_size,
output_dim=config.word_embedding_size,
mask_zero=True)(word_ids)
else:
word_embeddings = Embedding(input_dim=embeddings.shape[0],
output_dim=embeddings.shape[1],
mask_zero=True,
weights=[embeddings])(word_ids)
# build character based word embedding
char_ids = Input(batch_shape=(None, None, None), dtype='int32')
char_embeddings = Embedding(input_dim=config.char_vocab_size,
output_dim=config.char_embedding_size,
mask_zero=True
)(char_ids)
s = K.shape(char_embeddings)
char_embeddings = Lambda(lambda x: K.reshape(x, shape=(-1, s[-2], config.char_embedding_size)))(char_embeddings)
fwd_state = LSTM(config.num_char_lstm_units, return_state=True)(char_embeddings)[-2]
bwd_state = LSTM(config.num_char_lstm_units, return_state=True, go_backwards=True)(char_embeddings)[-2]
char_embeddings = Concatenate(axis=-1)([fwd_state, bwd_state])
# shape = (batch size, max sentence length, char hidden size)
char_embeddings = Lambda(lambda x: K.reshape(x, shape=[-1, s[1], 2 * config.num_char_lstm_units]))(char_embeddings)
# combine characters and word
x = Concatenate(axis=-1)([word_embeddings, char_embeddings])
x = Dropout(config.dropout)(x)
x = Bidirectional(LSTM(units=config.num_word_lstm_units, return_sequences=True))(x)
x = Dropout(config.dropout)(x)
x = Dense(config.num_word_lstm_units, activation='tanh')(x)
x = Dense(ntags)(x)
self.crf = ChainCRF()
pred = self.crf(x)
sequence_lengths = Input(batch_shape=(None, 1), dtype='int32')
self.model = Model(inputs=[word_ids, char_ids, sequence_lengths], outputs=[pred])
self.config = config
def one_block_model(self, input_tensor):
"""
Model for the twoPathways CNN.
It doesn't compile the model.
The consist of two streams, namely:
local_path anc global_path joined
in a final stream named path
local_path is articulated through:
1st convolution 64x7x7 + relu
1st maxpooling 4x4
1st Dropout with rate: 0.5
2nd convolution 64x3x3 + relu
2nd maxpooling 2x2
2nd droput with rate: 0.5
global_path is articulated through:
convolution 160x13x13 + relu
dropout with rate: 0.5
path is articulated through:
convolution 5x21x21
:param input_tensor: tensor, to feed the two path
:return: output: tensor, the output of the cnn
"""
# localPath
loc_path = Conv2D(64, (7, 7), padding='valid', activation='relu', use_bias=True,
kernel_regularizer=regularizers.l1_l2(self.l1_rate, self.l2_rate),
kernel_constraint=max_norm(2.),
bias_constraint=max_norm(2.))(input_tensor)
loc_path = MaxPooling2D(pool_size=(4, 4), strides=1, padding='valid')(loc_path)
loc_path = Dropout(self.dropout_rate)(loc_path)
loc_path = Conv2D(64, (3, 3), padding='valid', activation='relu', use_bias=True,
kernel_regularizer=regularizers.l1_l2(self.l1_rate, self.l2_rate),
kernel_constraint=max_norm(2.),
bias_constraint=max_norm(2.))(loc_path)
loc_path = MaxPooling2D(pool_size=(2, 2), strides=1, padding='valid')(loc_path)
loc_path = Dropout(self.dropout_rate)(loc_path)
# globalPath
glob_path = Conv2D(160, (13, 13), strides=1, padding='valid', activation='relu', use_bias=True,
kernel_regularizer=regularizers.l1_l2(self.l1_rate, self.l2_rate),
kernel_constraint=max_norm(2.),
bias_constraint=max_norm(2.))(input_tensor)
glob_path = Dropout(self.dropout_rate)(glob_path)
# concatenation of the two path
path = Concatenate(axis=-1)([loc_path, glob_path])
# output layer
output = Conv2D(5, (21, 21), strides=1, padding='valid', activation='softmax', use_bias=True)(path)
return output
def compile_model(self):
"""
Model and compile the first CNN and the whole two blocks DCNN.
Also initialize the field cnn1
:return: Model, Two blocks DeepCNN compiled
"""
if self.cascade_model:
# input layers
input65 = Input(shape=(4, 65, 65))
input33 = Input(shape=(4, 33, 33))
# first CNN modeling
output_cnn1 = self.one_block_model(input65)
# first cnn compiling
cnn1 = Model(inputs=input65, outputs=output_cnn1)
sgd = SGD(lr=self.learning_rate, momentum=self.momentum_rate, decay=self.decay_rate, nesterov=False)
cnn1.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
# initialize the field cnn1
self.cnn1 = cnn1
print 'First CNN compiled!'
# concatenation of the output of the first CNN and the input of shape 33x33
conc_input = Concatenate(axis=1)([input33, output_cnn1])
# second cnn modeling
output_dcnn = self.one_block_model(conc_input)
output_dcnn = Reshape((5,))(output_dcnn)
# whole dcnn compiling
dcnn = Model(inputs=[input65, input33], outputs=output_dcnn)
sgd = SGD(lr=self.learning_rate, momentum=self.momentum_rate, decay=self.decay_rate, nesterov=False)
dcnn.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
print 'Cascade DCNN compiled!'
return dcnn
else:
# input layers
input33 = Input(shape=(4, 33, 33))
# first CNN modeling
output_cnn1 = self.one_block_model(input33)
# first cnn compiling
cnn1 = Model(inputs=input33, outputs=output_cnn1)
sgd = SGD(lr=self.learning_rate, momentum=self.momentum_rate, decay=self.decay_rate, nesterov=False)
cnn1.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
# initialize the field cnn1
self.cnn1 = cnn1
print 'Two pathway CNN compiled!'
return cnn1