def adamax_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
updates = []
grads = T.grad(cost, params)
for p, g in zip(params, grads):
mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
if mom1>0:
v_t = mom1*v + (1. - mom1)*g
updates.append((v,v_t))
else:
v_t = g
mg_t = T.maximum(mom2*mg, abs(g))
g_t = v_t / (mg_t + 1e-6)
p_t = p - lr * g_t
updates.append((mg, mg_t))
updates.append((p, p_t))
return updates
python类shared()的实例源码
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
updates = []
grads = T.grad(cost, params)
t = th.shared(np.cast[th.config.floatX](1.))
for p, g in zip(params, grads):
v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
v_t = mom1*v + (1. - mom1)*g
mg_t = mom2*mg + (1. - mom2)*T.square(g)
v_hat = v_t / (1. - mom1 ** t)
mg_hat = mg_t / (1. - mom2 ** t)
g_t = v_hat / T.sqrt(mg_hat + 1e-8)
p_t = p - lr * g_t
updates.append((v, v_t))
updates.append((mg, mg_t))
updates.append((p, p_t))
updates.append((t, t+1))
return updates
def compile(self,s_inputs_, s_loss_, v_params_, s_grads_=None, s_reg_=0, fetches_=None, updates_=None, givens_=None, trunc_grad_=None, profile_=False):
def get_shared_shape(v):
return v.get_value(borrow=True, return_internal_type=True).shape
if type(s_inputs_) not in (list, tuple):
s_inputs_ = [s_inputs_]
if isinstance(updates_, dict):
updates_= list(updates_.items())
super(AdamSGD,self).compile(
s_inputs_, s_loss_, v_params_, s_reg_=s_reg_, s_grads_=s_grads_, trunc_grad_=trunc_grad_)
self.v_m = [th.shared(value=np.zeros(get_shared_shape(p), th.config.floatX), name='adam_m_'+p.name if p.name is not None else None) for p in v_params_]
self.v_v = [th.shared(value=np.zeros(get_shared_shape(p), th.config.floatX), name='adam_v_'+p.name if p.name is not None else None) for p in v_params_]
s_b1 = T.scalar('adam_b1'); s_b2 = T.scalar('adam_b2')
s_b1s = T.scalar('adam_b1s'); s_b2s = T.scalar('adam_b2s')
update_m = [(m, (m*s_b1 + (1.-s_b1)*g)) for m,g in zip(self.v_m,self.s_grads)]
update_v = [(v, (v*s_b2 + (1.-s_b2)*g*g)) for v,g in zip(self.v_v,self.s_grads)]
apply_grad = [(p, p-(s_b1s*m*self.s_lr)/(T.sqrt(s_b2s*v)+self.eps)) for p,m,v in zip(v_params_,self.v_m,self.v_v)]
self.fn_train = th.function(
inputs=[self.s_lr]+s_inputs_+[s_b1,s_b2,s_b1s,s_b2s],
outputs=fetches_,
updates=update_m+update_v+apply_grad+(updates_ if updates_ else []),
on_unused_input='warn',
givens=givens_, profile=profile_)
self.fn_rst = th.function(inputs=[], updates=[(v, T.zeros_like(v)) for v in self.v_m+self.v_v], profile=profile_)
return self.fn_train
def get_costs(self, probs, y, y_mask,
decay_cs=None, opt_rets=None):
"""
probs : dict, mapping cg_name to probabilities
y : theano tensor variable
y_mask : theano tensor variable
decay_cs : list of l2 regularization weights
opt_rets : dict, mapping cg_name to optional returned variables
"""
costs = self.decoder.costs(probs, y, y_mask)
if decay_cs is not None:
for name, cost in costs.iteritems():
if decay_cs[name] > 0.:
decay_c = theano.shared(numpy.float32(decay_cs[name]),
name='decay_c')
weight_decay = 0.
for pp in ComputationGraph(cost).parameters:
weight_decay += (pp ** 2).sum()
weight_decay *= decay_c
costs[name] += weight_decay
costs[name].name = name
return costs
def param(name, *args, **kwargs):
"""
A wrapper for `theano.shared` which enables parameter sharing in models.
Creates and returns theano shared variables similarly to `theano.shared`,
except if you try to create a param with the same name as a
previously-created one, `param(...)` will just return the old one instead of
making a new one.
This constructor also adds a `param` attribute to the shared variables it
creates, so that you can easily search a graph for all params.
"""
if name not in _params:
kwargs['name'] = name
param = theano.shared(*args, **kwargs)
param.param = True
_params[name] = param
return _params[name]
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
updates = []
grads = T.grad(cost, params)
t = th.shared(np.cast[th.config.floatX](1.))
for p, g in zip(params, grads):
v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
v_t = mom1*v + (1. - mom1)*g
mg_t = mom2*mg + (1. - mom2)*T.square(g)
v_hat = v_t / (1. - mom1 ** t)
mg_hat = mg_t / (1. - mom2 ** t)
g_t = v_hat / T.sqrt(mg_hat + 1e-8)
p_t = p - lr * g_t
updates.append((v, v_t))
updates.append((mg, mg_t))
updates.append((p, p_t))
updates.append((t, t+1))
return updates
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
updates = []
grads = T.grad(cost, params)
t = th.shared(np.cast[th.config.floatX](1.))
for p, g in zip(params, grads):
v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
v_t = mom1*v + (1. - mom1)*g
mg_t = mom2*mg + (1. - mom2)*T.square(g)
v_hat = v_t / (1. - mom1 ** t)
mg_hat = mg_t / (1. - mom2 ** t)
g_t = v_hat / T.sqrt(mg_hat + 1e-8)
p_t = p - lr * g_t
updates.append((v, v_t))
updates.append((mg, mg_t))
updates.append((p, p_t))
updates.append((t, t+1))
return updates
def adadelta(parameters, gradients, rho=0.95, eps=1e-6):
""" Reference: ADADELTA: An Adaptive Learning Rate Method,
Zeiler 2012. https://arxiv.org/abs/1212.5701
Adapted from the Adadelta implementation from Tensorflow.
"""
accum = [theano.shared(numpy.zeros(p.get_value().shape, floatX)) for p in parameters]
accum_updates = [theano.shared(numpy.zeros(p.get_value().shape, floatX)) for p in parameters]
new_accum = [rho * g0 + (1.0 - rho) * (g**2) for g0, g in izip(accum, gradients)]
updates = [tensor.sqrt(d0 + eps) / tensor.sqrt(g0 + eps) * g for d0, g0, g in izip(accum_updates,
new_accum,
gradients)]
new_accum_updates = [rho * d0 + (1.0 - rho) * (d**2) for d0, d in izip(accum_updates,
updates)]
accum_ = zip(accum, new_accum)
accum_updates_ = zip(accum_updates, new_accum_updates)
parameters_ = [ (p, (p - d)) for p,d in izip(parameters, updates)]
return accum_ + accum_updates_ + parameters_
def addData(self, data):
"""
Set the data of the network, not managed within training iterations, e.g. used for validation or other small data
:param data: training data and labels specified as dictionary
:return: None
"""
if not isinstance(data, dict):
raise ValueError("Error: expected dictionary for data!")
for key in data:
# no need to cache validation data
setattr(self, key+'DB', self.alignData(data[key]))
# shared variable already exists?
if hasattr(self, key):
print("Reusing shared variables!")
getattr(self, key).set_value(getattr(self, key+'DB'), borrow=True)
else:
# create shared data
setattr(self, key, theano.shared(getattr(self, key+'DB'), name=key, borrow=True))
def addStaticData(self, data):
"""
Set the data of the network, not managed within training iterations, e.g. used for validation or other small data
:param data: training data and labels specified as dictionary
:return: None
"""
if not isinstance(data, dict):
raise ValueError("Error: expected dictionary for data!")
for key in data:
# no need to cache validation data
setattr(self, key+'DB', data[key])
# shared variable already exists?
if hasattr(self, key):
print("Reusing shared variables!")
getattr(self, key).set_value(getattr(self, key+'DB'), borrow=True)
else:
# create shared data
setattr(self, key, theano.shared(getattr(self, key+'DB'), name=key, borrow=True))
def replaceTrainingData(self, start_idx, end_idx, last=False):
"""
Replace the shared data of the training data
:param start_idx: start index of data
:param end_idx: end index of data
:param last: specify if it is last macro-batch
:return: None
"""
for var in self.managedVar:
if not hasattr(self, var):
raise ValueError("Variable " + var + " not defined!")
if last is True:
getattr(self, var).set_value(getattr(self, var+'DBlast')[start_idx:end_idx], borrow=True)
else:
getattr(self, var).set_value(getattr(self, var+'DB')[start_idx:end_idx], borrow=True)
def loadMacroBatch(self, macro_idx):
"""
Make sure that macro batch is loaded in the shared variable
:param macro_idx: macro batch index
:return: None
"""
if macro_idx != self.currentMacroBatch:
# last macro batch is handled separately, as it is padded
if self.isLastMacroBatch(macro_idx):
start_idx = 0
end_idx = self.getNumSamplesPerMacroBatch()
print("Loading last macro batch {}, start idx {}, end idx {}".format(macro_idx, start_idx, end_idx))
self.replaceTrainingData(start_idx, end_idx, last=True)
# remember current macro batch index
self.currentMacroBatch = macro_idx
else:
start_idx = macro_idx * self.getNumSamplesPerMacroBatch()
end_idx = min((macro_idx + 1) * self.getNumSamplesPerMacroBatch(), self.train_data_xDB.shape[0])
print("Loading macro batch {}, start idx {}, end idx {}".format(macro_idx, start_idx, end_idx))
self.replaceTrainingData(start_idx, end_idx)
# remember current macro batch index
self.currentMacroBatch = macro_idx
def query_variable(self, query_):
'''
Return an iterable which yields shared variables found by query_, from current group.
query_:
Can take several forms, as shown below.
All: return all variables under current group.
string: treat as regex, return variables whose name fully match the regex.
'''
if query_ is All:
return self._current_group_di.values()
elif isinstance(query_, str):
regex = re.compile(query_)
return {k:v for k,v in self._current_group_di.items() if regex.fullmatch(k)}
else:
raise TypeError('Unknown query type "%s"' % type(query_))
# TODO add / delete group does not consider non-group object by now
def sgd_optimizer(model, lr=0.001, momentum=0.9):
lr = theano.shared(np.array(lr).astype(theano.config.floatX))
# Make sure momentum is a sane value
assert momentum < 1 and momentum >= 0
# the updates of SGD with momentum
updates = []
grads = T.grad(model.costs[0], model.params)
for param, grad in zip(model.params, grads):
param_update = theano.shared(param.get_value()*0.)
updates.append((param, param - lr * param_update))
updates.append((param_update, momentum*param_update + (1. - momentum)*grad))
train_func = theano.function(model.inputs, model.costs, updates=updates)
valid_func = theano.function(model.inputs, model.costs)
return train_func, valid_func
def get_cost(aes, l, eye=True):
"""Get the sum of all the reconstruction costs of the AEs.
Input:
aes_in: list. List of all the aes.
l: shared variable or a list of shared variables for the importance
weights.
"""
costs = []
for ae, i in zip(aes, range(len(aes))):
if isinstance(ae, ConvolutionalAutoencoder):
costs.append(l[i] * ae.get_train_cost()[0])
else:
costs.append(l[i] * ae.get_train_cost(face=eye)[0])
cost = None
if costs not in [[], None]:
cost = reduce(lambda x, y: x + y, costs)
return cost
def evaluate_model(list_minibatchs_vl, eval_fn):
"""Evalute the model over a set."""
error, output = None, None
for mn_vl in list_minibatchs_vl:
x = theano.shared(
mn_vl['x'], borrow=True).get_value(borrow=True)
y = theano.shared(
mn_vl['y'], borrow=True).get_value(borrow=True)
[error_mn, output_mn] = eval_fn(x, y)
if error is None:
error = error_mn
output = output_mn
else:
error = np.vstack((error, error_mn))
output = np.vstack((output, output_mn))
return error, output
def evaluate_model_3D_unsup(list_minibatchs_vl, eval_fn):
"""Evalute the model over a set."""
error, output, code = None, None, None
for mn_vl in list_minibatchs_vl:
x = theano.shared(
mn_vl['x'], borrow=True).get_value(borrow=True)
[error_mn, output_mn, code_mn] = eval_fn(x)
if error is None:
error = error_mn
output = output_mn
code = code_mn
else:
error = np.vstack((error, error_mn))
output = np.vstack((output, output_mn))
code = np.vstack((code, code_mn))
return error, output, code
def shared_dataset(self, data_xy, train=False, borrow=True):
"""Load the data to the shared variables of Theano.
Copy for once the data to the shared memory on the GPU.
"""
data_x, data_y = data_xy
if train:
dim_output = 10 # case of MNIST
data_y = np.int32(self.labels(data_y, dim_output))
shared_x = theano.shared(
np.asarray(data_x, dtype = theano.config.floatX),
borrow=borrow)
shared_y = theano.shared (
np.asarray(data_y, dtype = theano.config.floatX),
borrow=borrow)
return shared_x, T.cast(shared_y, 'int32')
def load_data(self, dataset_path, share = False):
"""Load the data set.
"""
f = gzip.open(dataset_path, 'rb')
train_set, valid_set, test_set = pickle.load(f)
f.close()
# share the data
train_set_x, train_set_y = self.shared_dataset(train_set, train=True)
valid_set_x, valid_set_y = self.shared_dataset(valid_set)
test_set_x, test_set_y = self.shared_dataset(test_set)
if share:
reval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]
else:
reval = [train_set, valid_set, test_set] # NON-shared data (they didn't share the data in the code Crino!!!!!)
return reval
def shared_dataset_xy(self, data_xy, nlabels = 10, train = False, task="cls", borrow=True):
"""Load the data to the shared variables of Theano.
Copy for once the data to the shared memory on the GPU.
"""
data_x, data_y = data_xy
if (train) and (task=='cls'):
data_y = np.int32(self.labels(data_y, nlabels))
shared_x = theano.shared(
np.asarray(data_x, dtype = theano.config.floatX),
borrow=borrow)
shared_y = theano.shared (
np.asarray(data_y, dtype = theano.config.floatX),
borrow=borrow)
return shared_x, T.cast(shared_y, 'int32')
def adadelta(tparams, grads, x, y, mask, lengths, cost):
zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.iteritems()]
running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % k) for k, p in tparams.iteritems()]
running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k) for k, p in tparams.iteritems()]
zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]
f_grad_shared = theano.function([x, y, mask, lengths], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared')
updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)]
ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]
f_update = theano.function([], [], updates=ru2up + param_up, on_unused_input='ignore', name='adadelta_f_update')
return f_grad_shared, f_update
def adadelta(tparams, grads, weightVector, iVector, jVector, cost):
zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.iteritems()]
running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % k) for k, p in tparams.iteritems()]
running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k) for k, p in tparams.iteritems()]
zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]
f_grad_shared = theano.function([weightVector, iVector, jVector], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared')
updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)]
ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]
f_update = theano.function([], [], updates=ru2up + param_up, on_unused_input='ignore', name='adadelta_f_update')
return f_grad_shared, f_update
def shared_dropout_layer(shape, use_noise, trng, value, scaled=True):
#re-scale dropout at training time, so we don't need to at test time
if scaled:
proj = tensor.switch(
use_noise,
trng.binomial(shape, p=value, n=1,
dtype='float32')/value,
theano.shared(numpy.float32(1.)))
else:
proj = tensor.switch(
use_noise,
trng.binomial(shape, p=value, n=1,
dtype='float32'),
theano.shared(numpy.float32(value)))
return proj
# feedforward layer: affine transformation + point-wise nonlinearity
def mdclW(num_filters,num_channels,filter_size,winit,name,scales):
# Coefficient Initializer
sinit = lasagne.init.Constant(1.0/(1+len(scales)))
# Total filter size
size = filter_size + (filter_size-1)*(scales[-1]-1)
# Multiscale Dilated Filter
W = T.zeros((num_filters,num_channels,size,size))
# Undilated Base Filter
baseW = theano.shared(lasagne.utils.floatX(winit.sample((num_filters,num_channels,filter_size,filter_size))),name=name+'.W')
for scale in enumerate(scales[::-1]): # enumerate backwards so that we place the main filter on top
W = T.set_subtensor(W[:,:,scales[-1]-scale:size-scales[-1]+scale:scale,scales[-1]-scale:size-scales[-1]+scale:scale],
baseW*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'.coeff_'+str(scale)).dimshuffle(0,'x','x','x'))
return W
# Subpixel Upsample Layer from (https://arxiv.org/abs/1609.05158)
# This layer uses a set of r^2 set_subtensor calls to reorganize the tensor in a subpixel-layer upscaling style
# as done in the ESPCN Magic ony paper for super-resolution.
# r is the upscale factor.
# c is the number of output channels.
def load_weights(params, path, num_conv):
print 'Loading gan weights from ' + path
with h5py.File(path, 'r') as hdf5:
params['skipthought2image'] = theano.shared(np.copy(hdf5['skipthought2image']))
params['skipthought2image-bias'] = theano.shared(np.copy(hdf5['skipthought2image-bias']))
for i in xrange(num_conv):
params['W_conv{}'.format(i)] = theano.shared(np.copy(hdf5['W_conv{}'.format(i)]))
params['b_conv{}'.format(i)] = theano.shared(np.copy(hdf5['b_conv{}'.format(i)]))
# Flip w,h axes
params['W_conv{}'.format(i)] = params['W_conv{}'.format(i)][:,:,::-1,::-1]
w = np.abs(np.copy(hdf5['W_conv{}'.format(i)]))
print 'W_conv{}'.format(i), np.min(w), np.mean(w), np.max(w)
b = np.abs(np.copy(hdf5['b_conv{}'.format(i)]))
print 'b_conv{}'.format(i), np.min(b), np.mean(b), np.max(b)
return params
def __init__(self, dimX, dimReadAttent, dimWriteAttent, dimRNNEnc, dimRNNDec, dimZ, runSteps, inputData, valData=None, testData=None, pathToWeights=None):
self.dimX = dimX
self.dimReadAttent = dimReadAttent
self.dimWriteAttent = dimWriteAttent
self.dimRNNEnc = dimRNNEnc
self.dimRNNDec = dimRNNDec
self.dimZ = dimZ
self.runSteps = runSteps
self.pathToWeights = pathToWeights
self.n_batches = inputData.shape[0] / batch_size
self.train_data = theano.shared(inputData)
del inputData
if valData != None:
self.n_val_batches = valData.shape[0] / batch_size
self.val_data = theano.shared(valData)
del valData
if testData != None:
self.n_test_batches = testData.shape[0] / batch_size
self.test_data = theano.shared(testData)
del testData
self._kl_final, self._logpxz, self._log_likelihood, self._c_ts, self._c_ts_gener, self._x, self._run_steps, self._updates_train, self._updates_gener, self._read_attent_params, self._write_attent_params, self._write_attent_params_gener, self._params = build_lstm_attention_vae(self.dimX, self.dimReadAttent, self.dimWriteAttent, self.dimRNNEnc, self.dimRNNDec, self.dimZ, self.runSteps, self.pathToWeights)
def shared_dataset_x(data_x, borrow=True):
""" Function that loads the dataset into shared variables
The reason we store our dataset in shared variables is to allow
Theano to copy it into the GPU memory (when code is run on GPU).
Since copying data into the GPU is slow, copying a minibatch everytime
is needed (the default behaviour if the data is not in a shared
variable) would lead to a large decrease in performance.
"""
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
borrow=borrow)
# When storing data on the GPU it has to be stored as floats
# therefore we will store the labels as ``floatX`` as well
# (``shared_y`` does exactly that). But during our computations
# we need them as ints (we use labels as index, and if they are
# floats it doesn't make sense) therefore instead of returning
# ``shared_y`` we will have to cast it to int. This little hack
# lets ous get around this issue
return shared_x
def __init__(self, rng, n_in, n_out, minibatch_size):
super(GRULayer, self).__init__()
# Notation from: An Empirical Exploration of Recurrent Network Architectures
self.n_in = n_in
self.n_out = n_out
# Initial hidden state
self.h0 = theano.shared(value=np.zeros((minibatch_size, n_out)).astype(theano.config.floatX), name='h0', borrow=True)
# Gate parameters:
self.W_x = weights_Glorot(n_in, n_out*2, 'W_x', rng)
self.W_h = weights_Glorot(n_out, n_out*2, 'W_h', rng)
self.b = weights_const(1, n_out*2, 'b', 0)
# Input parameters
self.W_x_h = weights_Glorot(n_in, n_out, 'W_x_h', rng)
self.W_h_h = weights_Glorot(n_out, n_out, 'W_h_h', rng)
self.b_h = weights_const(1, n_out, 'b_h', 0)
self.params = [self.W_x, self.W_h, self.b, self.W_x_h, self.W_h_h, self.b_h]
def setParams(self, W_IN, b_IN):
# controllo sulle dimensioni
if ( W_IN.shape[0] == self.W.shape.eval()[0] and
W_IN.shape[1] == self.W.shape.eval()[1] and
len(b_IN) == self.b.shape.eval()[0] ):
self.W.set_value(W_IN)
self.b.set_value(b_IN)
#self.W = theano.shared(value=W_IN, name='W', borrow=True)
# initialize the baises b as a vector of n_out 0s
#self.b = theano.shared(value=b_IN, name='b', borrow=True)
else :
print "NEW_logistic_sgd:Errore nelle dimensioni delle matrici passate"
print "W(input) shape", W_IN.shape, "W shape", self.W.shape.eval()
print "b(input) shape", len(b_IN), "b shape", self.b.shape.eval()
def shared_dataset(data_x, data_y, borrow=True):
""" Function that loads the dataset into shared variables
The reason we store our dataset in shared variables is to allow
Theano to copy it into the GPU memory (when code is run on GPU).
Since copying data into the GPU is slow, copying a minibatch everytime
is needed (the default behaviour if the data is not in a shared
variable) would lead to a large decrease in performance.
"""
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
borrow=borrow)
shared_y = theano.shared(numpy.asarray(data_y,
dtype=theano.config.floatX),
borrow=borrow)
return shared_x, T.cast(shared_y, 'int32')