def CNN(n_epochs):
net1 = NeuralNet(
layers=[
('input', layers.InputLayer),
('conv1', layers.Conv2DLayer), # Convolutional layer. Params defined below
('pool1', layers.MaxPool2DLayer), # Like downsampling, for execution speed
('conv2', layers.Conv2DLayer),
('hidden3', layers.DenseLayer),
('output', layers.DenseLayer),
],
input_shape=(None, 1, 6, 5),
conv1_num_filters=8,
conv1_filter_size=(3, 3),
conv1_nonlinearity=lasagne.nonlinearities.rectify,
pool1_pool_size=(2, 2),
conv2_num_filters=12,
conv2_filter_size=(1, 1),
conv2_nonlinearity=lasagne.nonlinearities.rectify,
hidden3_num_units=1000,
output_num_units=2,
output_nonlinearity=lasagne.nonlinearities.softmax,
update_learning_rate=0.0001,
update_momentum=0.9,
max_epochs=n_epochs,
verbose=0,
)
return net1
python类DenseLayer()的实例源码
convolutional_neural_network.py 文件源码
项目:kaggle-breast-cancer-prediction
作者: sirCamp
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def _set_inverse_parameters(self, patterns=None):
for l in L.get_all_layers(self.output_layer):
if type(l) is L.Conv2DLayer:
W = l.W.get_value()
if l.flip_filters:
W = W[:,:,::-1,::-1]
W = W.transpose(1,0,2,3)
self.inverse_map[l].W.set_value(W)
elif type(l) is L.DenseLayer:
self.inverse_map[l].W.set_value(l.W.get_value().T)
def _set_inverse_parameters(self, patterns=None):
for l in L.get_all_layers(self.output_layer):
if type(l) is L.Conv2DLayer:
W = l.W.get_value()
if l.flip_filters:
W = W[:,:,::-1,::-1]
W = W.transpose(1,0,2,3)
self.inverse_map[l].W.set_value(W)
elif type(l) is L.DenseLayer:
self.inverse_map[l].W.set_value(l.W.get_value().T)
def _get_normalised_relevance_layer(self, layer, feeder):
def add_epsilon(Zs):
tmp = (T.cast(Zs >= 0, theano.config.floatX)*2.0 - 1.0)
return Zs + self.epsilon * tmp
if isinstance(layer, L.DenseLayer):
forward_layer = L.DenseLayer(layer.input_layer,
layer.num_units,
W=layer.W,
b=layer.b,
nonlinearity=None)
elif isinstance(layer, L.Conv2DLayer):
forward_layer = L.Conv2DLayer(layer.input_layer,
num_filters=layer.num_filters,
W=layer.W,
b=layer.b,
stride=layer.stride,
filter_size=layer.filter_size,
flip_filters=layer.flip_filters,
untie_biases=layer.untie_biases,
pad=layer.pad,
nonlinearity=None)
else:
raise NotImplementedError()
forward_layer = L.ExpressionLayer(forward_layer,
lambda x: 1.0 / add_epsilon(x))
feeder = L.ElemwiseMergeLayer([forward_layer, feeder],
merge_function=T.mul)
return feeder
def _invert_DenseLayer(self,layer,feeder):
# Warning they are swapped here
feeder = self._put_rectifiers(feeder, layer)
feeder = self._get_normalised_relevance_layer(layer, feeder)
output_units = np.prod(L.get_output_shape(layer.input_layer)[1:])
output_layer = L.DenseLayer(feeder, num_units=output_units)
W = output_layer.W
tmp_shape = np.asarray((-1,)+L.get_output_shape(output_layer)[1:])
x_layer = L.ReshapeLayer(layer.input_layer, tmp_shape.tolist())
output_layer = L.ElemwiseMergeLayer(incomings=[x_layer, output_layer],
merge_function=T.mul)
output_layer.W = W
return output_layer
def _invert_DenseLayer(self, layer, feeder):
# Warning they are swapped here
feeder = self._put_rectifiers(feeder, layer)
output_units = np.prod(L.get_output_shape(layer.input_layer)[1:])
output_layer = L.DenseLayer(feeder,
num_units=output_units,
nonlinearity=None, b=None)
return output_layer
def _invert_layer(self, layer, feeder):
layer_type = type(layer)
if L.get_output_shape(feeder) != L.get_output_shape(layer):
feeder = L.ReshapeLayer(feeder, (-1,)+L.get_output_shape(layer)[1:])
if layer_type is L.InputLayer:
return self._invert_InputLayer(layer, feeder)
elif layer_type is L.FlattenLayer:
return self._invert_FlattenLayer(layer, feeder)
elif layer_type is L.DenseLayer:
return self._invert_DenseLayer(layer, feeder)
elif layer_type is L.Conv2DLayer:
return self._invert_Conv2DLayer(layer, feeder)
elif layer_type is L.DropoutLayer:
return self._invert_DropoutLayer(layer, feeder)
elif layer_type in [L.MaxPool2DLayer, L.MaxPool1DLayer]:
return self._invert_MaxPoolingLayer(layer, feeder)
elif layer_type is L.PadLayer:
return self._invert_PadLayer(layer, feeder)
elif layer_type is L.SliceLayer:
return self._invert_SliceLayer(layer, feeder)
elif layer_type is L.LocalResponseNormalization2DLayer:
return self._invert_LocalResponseNormalisation2DLayer(layer, feeder)
elif layer_type is L.GlobalPoolLayer:
return self._invert_GlobalPoolLayer(layer, feeder)
else:
return self._invert_UnknownLayer(layer, feeder)
def _collect_layers(self):
self.all_layers = L.get_all_layers(self.output_layer)
ret = [l for l in self.all_layers if
type(l) in [L.DenseLayer, L.Conv2DLayer]]
return ret
def _get_split(self, layer,
deterministic=True, conv_all_patches=True, **kwargs):
# Get the patches and the outputs without the non-linearities.
if type(layer) is L.DenseLayer:
x, y = putils.get_dense_xy(layer, deterministic)
elif type(layer) is L.Conv2DLayer:
if conv_all_patches is True:
x, y = putils.get_conv_xy_all(layer, deterministic)
else:
x, y = putils.get_conv_xy(layer, deterministic)
else:
raise ValueError("Unknown layer as input")
# Create an output dictionary
outputs = dict()
for name, fun in subtypes:
outputs[name] = dict()
mrk_y = 1.0* T.cast(fun(y), dtype=theano.config.floatX) # (N,O)
y_current = y*mrk_y # This has a binary mask
cnt_y = T.shape_padaxis(T.sum(mrk_y, axis=0), axis=0) # (1,O)
norm = T.maximum(cnt_y, 1.)
# Count how many datapoints are considered
outputs[name]['cnt'] = cnt_y
# The mean of the current batch
outputs[name]['m_y'] = T.shape_padaxis(y_current.sum(axis=0), axis=0) / norm # (1,O) mean output for batch
outputs[name]['m_x'] = T.dot(x.T, mrk_y) / norm # (D,O) mean input for batch
# The mean of the current batch
outputs[name]['yty'] = T.shape_padaxis(T.sum(y_current ** 2., axis=0), axis=0) / norm # (1,O)
outputs[name]['xty'] = T.dot(x.T, y_current) / norm # D,O
return dict_to_list(outputs)
def get_split(self, layer,
deterministic=True, conv_all_patches=True, **kwargs):
# Get the patches and the outputs without the non-linearities.
if type(layer) is L.DenseLayer:
x, y = get_dense_xy(layer, deterministic)
elif type(layer) is L.Conv2DLayer:
if conv_all_patches is True:
x, y = get_conv_xy_all(layer, deterministic)
else:
x, y = get_conv_xy(layer, deterministic)
else:
raise ValueError("Unknown layer as input")
# Create an output dictionary
outputs = dict()
for name, fun in subtypes:
outputs[name] = dict()
mrk_y = 1.0* T.cast(fun(y), dtype=theano.config.floatX) # (N,O)
y_current = y*mrk_y # This has a binary mask
cnt_y = T.shape_padaxis(T.sum(mrk_y, axis=0), axis=0) # (1,O)
norm = T.maximum(cnt_y, 1.)
# Count how many datapoints are considered
outputs[name]['cnt'] = cnt_y
# The mean of the current batch
outputs[name]['m_y'] = T.shape_padaxis(y_current.sum(axis=0), axis=0) / norm # (1,O) mean output for batch
outputs[name]['m_x'] = T.dot(x.T, mrk_y) / norm # (D,O) mean input for batch
# The mean of the current batch
outputs[name]['yty'] = T.shape_padaxis(T.sum(y_current ** 2., axis=0), axis=0) / norm # (1,O)
outputs[name]['xty'] = T.dot(x.T, y_current) / norm # D,O
return dict_to_list(outputs)
def exe_maxru(length, num_units, position, binominal):
batch_size = BATCH_SIZE
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
target_var = T.ivector(name='targets')
layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')
time_updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
time_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.GlorotUniform())
hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
layer_taru = MAXRULayer(layer_input, num_units, max_length=length,
P_time=lasagne.init.GlorotUniform(), nonlinearity=nonlinearities.tanh,
resetgate=resetgate, updategate=updategate, hidden_update=hiden_update,
time_updategate=time_updategate, time_update=time_update,
only_return_final=True, name='MAXRU', p=0.)
# W = layer_taru.W_hid_to_hidden_update.sum()
# U = layer_taru.W_in_to_hidden_update.sum()
# b = layer_taru.b_hidden_update.sum()
layer_output = DenseLayer(layer_taru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')
return train(layer_output, input_var, target_var, batch_size, length, position, binominal)
def exe_lstm(use_embedd, length, num_units, position, binominal):
batch_size = BATCH_SIZE
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
target_var = T.ivector(name='targets')
layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input')
if use_embedd:
layer_position = construct_position_input(batch_size, length, num_units)
layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2)
ingate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
outgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1))
# according to Jozefowicz et al.(2015), init bias of forget gate to 1.
forgetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(),
W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.))
# now use tanh for nonlinear function of cell, need to try pure linear cell
cell = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
layer_lstm = LSTMLayer(layer_input, num_units, ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate,
peepholes=False, nonlinearity=nonlinearities.tanh, only_return_final=True, name='LSTM')
# W = layer_lstm.W_hid_to_cell.sum()
# U = layer_lstm.W_in_to_cell.sum()
# b = layer_lstm.b_cell.sum()
layer_output = DenseLayer(layer_lstm, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')
return train(layer_output, layer_lstm, input_var, target_var, batch_size, length, position, binominal)
def exe_gru(use_embedd, length, num_units, position, binominal, reset_input):
batch_size = BATCH_SIZE
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
target_var = T.ivector(name='targets')
layer_input = lasagne.layers.InputLayer(shape=(batch_size, length, 1), input_var=input_var, name='input')
if use_embedd:
layer_position = construct_position_input(batch_size, length, num_units)
layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2)
resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None)
hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None,
b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh)
layer_gru = GRULayer_ANA(layer_input, num_units, resetgate=resetgate, updategate=updategate, hidden_update=hiden_update,
reset_input=reset_input, only_return_final=True, name='GRU')
# W = layer_gru.W_hid_to_hidden_update.sum()
# U = layer_gru.W_in_to_hidden_update.sum()
# b = layer_gru.b_hidden_update.sum()
layer_output = DenseLayer(layer_gru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output')
return train(layer_output, layer_gru, input_var, target_var, batch_size, length, position, binominal)
def dense_layer(input, n_units, name, network_weights, nonlinearity=None, bn=False):
layer = DenseLayer(input, num_units=n_units, nonlinearity=nonlinearity, name=name,
W=get_W(network_weights, name), b=get_b(network_weights, name))
if bn:
layer = batch_norm(layer)
return layer
def create_network():
l = 1000
pool_size = 5
test_size1 = 13
test_size2 = 7
test_size3 = 5
kernel1 = 128
kernel2 = 128
kernel3 = 128
layer1 = InputLayer(shape=(None, 1, 4, l+1024))
layer2_1 = SliceLayer(layer1, indices=slice(0, l), axis = -1)
layer2_2 = SliceLayer(layer1, indices=slice(l, None), axis = -1)
layer2_3 = SliceLayer(layer2_2, indices = slice(0,4), axis = -2)
layer2_f = FlattenLayer(layer2_3)
layer3 = Conv2DLayer(layer2_1,num_filters = kernel1, filter_size = (4,test_size1))
layer4 = Conv2DLayer(layer3,num_filters = kernel1, filter_size = (1,test_size1))
layer5 = Conv2DLayer(layer4,num_filters = kernel1, filter_size = (1,test_size1))
layer6 = MaxPool2DLayer(layer5, pool_size = (1,pool_size))
layer7 = Conv2DLayer(layer6,num_filters = kernel2, filter_size = (1,test_size2))
layer8 = Conv2DLayer(layer7,num_filters = kernel2, filter_size = (1,test_size2))
layer9 = Conv2DLayer(layer8,num_filters = kernel2, filter_size = (1,test_size2))
layer10 = MaxPool2DLayer(layer9, pool_size = (1,pool_size))
layer11 = Conv2DLayer(layer10,num_filters = kernel3, filter_size = (1,test_size3))
layer12 = Conv2DLayer(layer11,num_filters = kernel3, filter_size = (1,test_size3))
layer13 = Conv2DLayer(layer12,num_filters = kernel3, filter_size = (1,test_size3))
layer14 = MaxPool2DLayer(layer13, pool_size = (1,pool_size))
layer14_d = DenseLayer(layer14, num_units= 256)
layer3_2 = DenseLayer(layer2_f, num_units = 128)
layer15 = ConcatLayer([layer14_d,layer3_2])
layer16 = DropoutLayer(layer15,p=0.5)
layer17 = DenseLayer(layer16, num_units=256)
network = DenseLayer(layer17, num_units= 2, nonlinearity=softmax)
return network
#random search to initialize the weights
def create_network():
l = 1000
pool_size = 5
test_size1 = 13
test_size2 = 7
test_size3 = 5
kernel1 = 128
kernel2 = 128
kernel3 = 128
layer1 = InputLayer(shape=(None, 1, 4, l+1024))
layer2_1 = SliceLayer(layer1, indices=slice(0, l), axis = -1)
layer2_2 = SliceLayer(layer1, indices=slice(l, None), axis = -1)
layer2_3 = SliceLayer(layer2_2, indices = slice(0,4), axis = -2)
layer2_f = FlattenLayer(layer2_3)
layer3 = Conv2DLayer(layer2_1,num_filters = kernel1, filter_size = (4,test_size1))
layer4 = Conv2DLayer(layer3,num_filters = kernel1, filter_size = (1,test_size1))
layer5 = Conv2DLayer(layer4,num_filters = kernel1, filter_size = (1,test_size1))
layer6 = MaxPool2DLayer(layer5, pool_size = (1,pool_size))
layer7 = Conv2DLayer(layer6,num_filters = kernel2, filter_size = (1,test_size2))
layer8 = Conv2DLayer(layer7,num_filters = kernel2, filter_size = (1,test_size2))
layer9 = Conv2DLayer(layer8,num_filters = kernel2, filter_size = (1,test_size2))
layer10 = MaxPool2DLayer(layer9, pool_size = (1,pool_size))
layer11 = Conv2DLayer(layer10,num_filters = kernel3, filter_size = (1,test_size3))
layer12 = Conv2DLayer(layer11,num_filters = kernel3, filter_size = (1,test_size3))
layer13 = Conv2DLayer(layer12,num_filters = kernel3, filter_size = (1,test_size3))
layer14 = MaxPool2DLayer(layer13, pool_size = (1,pool_size))
layer14_d = DenseLayer(layer14, num_units= 256)
layer3_2 = DenseLayer(layer2_f, num_units = 128)
layer15 = ConcatLayer([layer14_d,layer3_2])
#layer16 = DropoutLayer(layer15,p=0.5)
layer17 = DenseLayer(layer15, num_units=256)
network = DenseLayer(layer17, num_units= 1, nonlinearity=None)
return network
#random search to initialize the weights
def build_tempral_model():
net={}
net['input']=InputLayer((None,24,2048))
net['lstm1']=LSTMLayer(net['input'],256)
net['fc']=DenseLayer(net['lstm1'],num_units=12,nonlinearity=sigmoid)
return net
def build_model():
net = {}
net['input'] = InputLayer((None, 512*20, 3, 3))
au_fc_layers=[]
for i in range(20):
net['roi_AU_N_'+str(i)]=SliceLayer(net['input'],indices=slice(i*512,(i+1)*512),axis=1)
#try to adding upsampling here for more conv
net['Roi_upsample_'+str(i)]=Upscale2DLayer(net['roi_AU_N_'+str(i)],scale_factor=2)
net['conv_roi_'+str(i)]=ConvLayer(net['Roi_upsample_'+str(i)],512,3)
net['au_fc_'+str(i)]=DenseLayer(net['conv_roi_'+str(i)],num_units=150)
au_fc_layers+=[net['au_fc_'+str(i)]]
#
net['local_fc']=concat(au_fc_layers)
net['local_fc2']=DenseLayer(net['local_fc'],num_units=2048)
net['local_fc_dp']=DropoutLayer(net['local_fc2'],p=0.5)
# net['fc_comb']=concat([net['au_fc_layer'],net['local_fc_dp']])
# net['fc_dense']=DenseLayer(net['fc_comb'],num_units=1024)
# net['fc_dense_dp']=DropoutLayer(net['fc_dense'],p=0.3)
net['real_out']=DenseLayer(net['local_fc_dp'],num_units=12,nonlinearity=sigmoid)
# net['final']=concat([net['pred_pos_layer'],net['output_layer']])
return net
def getTrainedRNN():
''' Read from file and set the params (To Do: Refactor
so as to do this only once) '''
input_size = 39
hidden_size = 50
num_output_classes = 29
learning_rate = 0.001
output_size = num_output_classes+1
batch_size = None
input_seq_length = None
gradient_clipping = 5
l_in = InputLayer(shape=(batch_size, input_seq_length, input_size))
n_batch, n_time_steps, n_features = l_in.input_var.shape #Unnecessary in this version. Just collecting the info so that we can reshape the output back to the original shape
# h_1 = DenseLayer(l_in, num_units=hidden_size, nonlinearity=clipped_relu)
l_rec_forward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu)
l_rec_backward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu, backwards=True)
l_rec_accumulation = ElemwiseSumLayer([l_rec_forward,l_rec_backward])
l_rec_reshaped = ReshapeLayer(l_rec_accumulation, (-1,hidden_size))
l_h2 = DenseLayer(l_rec_reshaped, num_units=hidden_size, nonlinearity=clipped_relu)
l_out = DenseLayer(l_h2, num_units=output_size, nonlinearity=lasagne.nonlinearities.linear)
l_out_reshaped = ReshapeLayer(l_out, (n_batch, n_time_steps, output_size))#Reshaping back
l_out_softmax = NonlinearityLayer(l_out, nonlinearity=lasagne.nonlinearities.softmax)
l_out_softmax_reshaped = ReshapeLayer(l_out_softmax, (n_batch, n_time_steps, output_size))
with np.load('CTC_model.npz') as f:
param_values = [f['arr_%d' % i] for i in range(len(f.files))]
lasagne.layers.set_all_param_values(l_out_softmax_reshaped, param_values, trainable = True)
output = lasagne.layers.get_output( l_out_softmax_reshaped )
return l_in, output
def getTrainedCLM():
''' Read CLM from file '''
#Some parameters for the CLM
INPUT_SIZE = 29
#Hidden layer hyper-parameters
N_HIDDEN = 100
HIDDEN_NONLINEARITY = 'rectify'
#Gradient clipping
GRAD_CLIP = 100
l_in = lasagne.layers.InputLayer(shape = (None, None, INPUT_SIZE)) #One-hot represenntation of character indices
l_mask = lasagne.layers.InputLayer(shape = (None, None))
l_recurrent = lasagne.layers.RecurrentLayer(incoming = l_in, num_units=N_HIDDEN, mask_input = l_mask, learn_init=True, grad_clipping=GRAD_CLIP)
Recurrent_output=lasagne.layers.get_output(l_recurrent)
n_batch, n_time_steps, n_features = l_in.input_var.shape
l_reshape = lasagne.layers.ReshapeLayer(l_recurrent, (-1, N_HIDDEN))
Reshape_output = lasagne.layers.get_output(l_reshape)
l_h1 = lasagne.layers.DenseLayer(l_reshape, num_units=N_HIDDEN)
l_h2 = lasagne.layers.DenseLayer(l_h1, num_units=N_HIDDEN)
l_dense = lasagne.layers.DenseLayer(l_h2, num_units=INPUT_SIZE, nonlinearity = lasagne.nonlinearities.softmax)
with np.load('CLM_model.npz') as f:
param_values = [f['arr_%d' % i] for i in range(len(f.files))]
lasagne.layers.set_all_param_values(l_dense, param_values,trainable = True)
output = lasagne.layers.get_output( l_dense )
return l_in,l_mask,output
#def getCLMOneHot( sequence ):