def dropout_layer(layer, p_dropout):
srng = shared_randomstreams.RandomStreams(
np.random.RandomState(0).randint(999999))
mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)
return layer*T.cast(mask, theano.config.floatX)
python类RandomStreams()的实例源码
def __init__(self, **kwargs):
super(TheanoBackend, self).__init__(**kwargs)
self.rng = RandomStreams(self._seed)
theano.config.floatX = _FLOATX
def reset_random_state(self):
self.rng = RandomStreams(self._seed)
# TENSOR CREATION
def dropout_layer(layer, p_dropout):
srng = shared_randomstreams.RandomStreams(
np.random.RandomState(0).randint(999999))
mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)
return layer*T.cast(mask, theano.config.floatX)
def __init__(self, rng, name, is_train, x, n_in, n_out, W=None, b=None, activation=ReLU, p=0.5):
"""p is the probability of NOT dropping out a unit"""
self.name = name
self.x = x
bound = np.sqrt(6./(n_in+n_out))
if W is None:
W_values = np.asarray(
rng.uniform(
low=-bound,
high=bound,
size=(n_in, n_out)
),
dtype=theano.config.floatX)
if activation == theano.tensor.nnet.sigmoid:
W_values *= 4
W = theano.shared(value=W_values, name='W', borrow=True)
if b is None:
# b_values = np.zeros((n_out,), dtype=theano.config.floatX)
b_values = np.ones((n_out,), dtype=theano.config.floatX) * np.cast[theano.config.floatX](bound)
b = theano.shared(value=b_values, name='b', borrow=True)
self.W = W
self.b = b
lin_output= T.dot(x, self.W) + self.b
output = (
lin_output if activation is None
else activation(lin_output))
def drop(x, rng=rng, p=p):
"""p is the probability of NOT dropping out a unit"""
srng = RandomStreams(rng.randint(999999))
mask = srng.binomial(n=1, p=p, size=x.shape, dtype=theano.config.floatX)
return x * mask
train_output = drop(np.cast[theano.config.floatX](1./p) * output)
self.output = T.switch(T.neq(is_train, 0), train_output, output)
self.params = [self.W, self.b]
def __init__(self, n_visible, n_hidden, nonlinearity="RLU"):
self.theano_rng = RandomStreams(np.random.randint(2 ** 30))
self.add_parameter(SizeParameter("n_visible"))
self.add_parameter(SizeParameter("n_hidden"))
self.add_parameter(NonLinearityParameter("nonlinearity"))
self.n_visible = n_visible
self.n_hidden = n_hidden
self.parameters["nonlinearity"].set_value(nonlinearity)
def __init__(self, M, an_id):
self.M = M
self.id = an_id
self.rng = RandomStreams()
def __init__(self, embedding_size: int, vocabulary_size: int, empirical_distribution, representation_size: int,
hyperparameters: dict, encoder_type: str, name: str = "GRUSequenceSiameseEncoder", use_centroid=False):
self.__hyperparameters = hyperparameters
self.__name = name
log_init_noise = self.__hyperparameters["log_init_noise"]
self.__memory_size = representation_size
self.__embedding_size = embedding_size
self.__vocabulary_size = vocabulary_size
self.__empirical_distribution = empirical_distribution
self.__encoder_type = encoder_type
embeddings = np.random.randn(vocabulary_size, embedding_size) * 10 ** log_init_noise
self.__embeddings = theano.shared(embeddings.astype(theano.config.floatX), name=name + ":embeddings")
self.__name_bias = theano.shared(np.log(empirical_distribution).astype(theano.config.floatX),
name=name + ":name_bias")
encoder_init_state = np.random.randn(representation_size) * 10 ** log_init_noise
self.__encoder_init_state = theano.shared(encoder_init_state.astype(theano.config.floatX),
name=name + ":encoder_init_state")
self.__rng = RandomStreams()
self.__input_sequence = T.ivector(name + ":input_sequence")
if encoder_type == 'gru':
self.__encoder = GRU(self.__embeddings, representation_size, embedding_size,
self.__hyperparameters, self.__rng, name=name + ":GRUSequenceEncoder",
use_centroid=use_centroid)
elif encoder_type == 'averaging_gru':
self.__encoder = AveragingGRU(self.__embeddings, representation_size, embedding_size,
self.__hyperparameters, self.__rng,
name=name + ":AveragingGRUSequenceEncoder", use_centroid=use_centroid)
else:
raise Exception("Unrecognized encoder type `%s`, possible options `gru` and `averaging_gru`")
self.__params = {"embeddings": self.__embeddings,
"encoder_init_state": self.__encoder_init_state}
self.__params.update(self.__encoder.get_params())
def __init__(self, embedding_size: int, vocabulary_size: int, empirical_distribution, representation_size: int,
hyperparameters: dict, encoder_type: str, name: str = "GRUSequenceSupervisedEncoder",
use_centroid=False):
self.__hyperparameters = hyperparameters
self.__name = name
log_init_noise = self.__hyperparameters["log_init_noise"]
self.__memory_size = representation_size
self.__embedding_size = embedding_size
embeddings = np.random.randn(vocabulary_size, embedding_size) * 10 ** log_init_noise
self.__embeddings = theano.shared(embeddings.astype(theano.config.floatX), name=name + ":embeddings")
self.__name_bias = theano.shared(np.log(empirical_distribution).astype(theano.config.floatX),
name=name + ":name_bias")
encoder_init_state = np.random.randn(representation_size) * 10 ** log_init_noise
self.__encoder_init_state = theano.shared(encoder_init_state.astype(theano.config.floatX),
name=name + ":encoder_init_state")
self.__rng = RandomStreams()
self.__input_sequence = T.ivector(name + ":input_sequence")
self.__output_sequence = T.ivector(name + ":output_sequence")
self.__inverted_output_sequence = self.__output_sequence[::-1]
if encoder_type == 'gru':
self.__encoder = GRU(self.__embeddings, representation_size, embedding_size,
self.__hyperparameters, self.__rng, name=name + ":GRUSequenceEncoder",
use_centroid=use_centroid)
elif encoder_type == 'averaging_gru':
self.__encoder = AveragingGRU(self.__embeddings, representation_size, embedding_size,
self.__hyperparameters, self.__rng,
name=name + ":AveragingGRUSequenceEncoder", use_centroid=use_centroid)
else:
raise Exception("Unrecognized encoder type `%s`, possible options `gru` and `averaging_gru`")
self.__params = {"embeddings": self.__embeddings,
"encoder_init_state": self.__encoder_init_state}
self.__params.update(self.__encoder.get_params())
self.__standalone_representation = T.dvector(self.__name + ":representation_input")
def __init__(self, training_filename: str, hyperparameters: dict, combination_type='residual_with_ae'):
self.__hyperparameters = hyperparameters
self.__dataset_extractor = TreeDatasetExtractor(training_filename)
self.__rng = RandomStreams()
self.__rnn = RNN(self.__hyperparameters['memory_size'], self.__hyperparameters, self.__rng,
self.__dataset_extractor, combination_type=combination_type)
self.__trainable_params = list(self.__rnn.get_params().values())
check_hyperparameters(self.REQUIRED_HYPERPARAMETERS | self.__rnn.required_hyperparameters,
self.__hyperparameters)
self.__compiled_methods = None
self.__trained_parameters = None
def __init__(self, training_filename: str, hyperparameters: dict, combination_type='eqnet'):
self.__hyperparameters = hyperparameters
self.__dataset_extractor = TreeDatasetExtractor(training_filename)
self.__rng = RandomStreams()
self.__rnn = RNN(self.__hyperparameters['memory_size'], self.__hyperparameters, self.__rng,
self.__dataset_extractor, combination_type=combination_type)
check_hyperparameters(self.REQUIRED_HYPERPARAMETERS | self.__rnn.required_hyperparameters,
self.__hyperparameters)
target_embeddings = np.random.randn(self.__hyperparameters['memory_size'],
self.__dataset_extractor.num_equivalent_classes) * 10 ** \
self.__hyperparameters[
"log_init_scale_embedding"]
self.__target_embeddings = theano.shared(target_embeddings.astype(theano.config.floatX),
name="target_embeddings")
self.__target_embeddings_dropout = dropout(self.__hyperparameters['dropout_rate'], self.__rng,
self.__target_embeddings, True)
self.__target_bias = np.log(self.__dataset_extractor.training_empirical_distribution)
self.__trainable_params = list(self.__rnn.get_params().values()) + [self.__target_embeddings]
self.__compiled_methods = None
self.__trained_parameters = None
def dropout(dropout_rate: float, rng: RandomStreams, parameter, use_dropout: bool):
if use_dropout:
mask = rng.binomial(parameter.shape, p=1. - dropout_rate, dtype=parameter.dtype)
return parameter * mask / (1. - dropout_rate)
else:
return parameter
def dropout_multiple(dropout_rate: float, rng: RandomStreams, use_dropout: bool, *parameters):
return tuple([dropout(dropout_rate, rng, p, use_dropout) for p in parameters])
def get_cell_with_dropout(self, rng: RandomStreams, dropout_rate: float):
raise NotImplementedError()
def get_cell_with_dropout(self, rng: RandomStreams, dropout_rate: float):
with_dropout = SimpleRecurrentCell.__new__(self.__class__)
with_dropout.__prev_hidden_to_next, with_dropout.__prediction_to_hidden = dropout_multiple(
dropout_rate, rng, True, self.__prev_hidden_to_next, self.__prediction_to_hidden)
with_dropout.__bias = self.__bias
with_dropout.get_cell_with_dropout = None
with_dropout.__name = self.__name + ":with_dropout"
return with_dropout
def get_cell_with_dropout(self, rng: RandomStreams, dropout_rate: float):
with_dropout = GruCell.__new__(GruCell)
with_dropout.__w_hid, with_dropout.__w_in = dropout_multiple(
dropout_rate, rng, True, self.__w_hid, self.__w_in)
with_dropout.__biases = self.__biases
with_dropout.get_cell_with_dropout = None
with_dropout.__name = self.__name + ":with_dropout"
with_dropout.__memory_D = self.__memory_D
with_dropout.__grad_clip = self.__grad_clip
return with_dropout
def __init__(self, rng, x, n_in, n_out, W = None, b = None, activation = T.tanh, p=0.0, training=0):
n_in = int(n_in) # ensure sizes have integer type
n_out = int(n_out)# ensure sizes have integer type
self.x = x
if p > 0.0:
if training==1:
srng = RandomStreams(seed=123456)
self.x = T.switch(srng.binomial(size=x.shape, p=p), x, 0)
else:
self.x = (1-p) * x
# initialize with 0 the weights W as a matrix of shape (n_in, n_out)
if W is None:
W_value = numpy.asarray(rng.normal(0.0, 1.0/numpy.sqrt(n_in),
size=(n_in, n_out)), dtype=theano.config.floatX)
W = theano.shared(value=W_value,
name='W', borrow=True)
if b is None:
b = theano.shared(value=numpy.zeros((n_out,),
dtype=theano.config.floatX),
name='b', borrow=True)
self.W = W
self.b = b
self.delta_W = theano.shared(value = numpy.zeros((n_in,n_out),
dtype=theano.config.floatX), name='delta_W')
self.delta_b = theano.shared(value = numpy.zeros_like(self.b.get_value(borrow=True),
dtype=theano.config.floatX), name='delta_b')
self.output = T.dot(self.x, self.W) + self.b
self.output = activation(self.output)
self.params = [self.W, self.b]
self.delta_params = [self.delta_W, self.delta_b]
def __init__(self, rng, x, n_in, n_out, W = None, b = None, activation = T.tanh, p=0.0, training=0):
n_in = int(n_in) # ensure sizes have integer type
n_out = int(n_out)# ensure sizes have integer type
self.x = x
if p > 0.0:
if training==1:
srng = RandomStreams(seed=123456)
self.x = T.switch(srng.binomial(size=x.shape, p=p), x, 0)
else:
self.x = (1-p) * x
# initialize with 0 the weights W as a matrix of shape (n_in, n_out)
if W is None:
W_value = numpy.asarray(rng.normal(0.0, 1.0/numpy.sqrt(n_in),
size=(n_in, n_out)), dtype=theano.config.floatX)
W = theano.shared(value=W_value,
name='W', borrow=True)
if b is None:
b = theano.shared(value=numpy.zeros((n_out,),
dtype=theano.config.floatX),
name='b', borrow=True)
self.W = W
self.b = b
self.delta_W = theano.shared(value = numpy.zeros((n_in,n_out),
dtype=theano.config.floatX), name='delta_W')
self.delta_b = theano.shared(value = numpy.zeros_like(self.b.get_value(borrow=True),
dtype=theano.config.floatX), name='delta_b')
self.output = T.dot(self.x, self.W) + self.b
self.output = activation(self.output)
self.params = [self.W, self.b]
self.delta_params = [self.delta_W, self.delta_b]
def __init__(self, classifier, args, noise_dist):
self.y = T.ivector('y')
## Cost function
# Sum over minibatch instances (log ( u(w|c) / (u(w|c) + k * p_n(w)) ) + sum over noise samples ( log ( u(x|c) / ( u(x|c) + k * p_n(x) ) )))
# Generating noise samples
srng = RandomStreams(seed=1234)
noise_samples = srng.choice(size=(self.y.shape[0],args.num_noise_samples), a=args.num_classes, p=noise_dist, dtype='int32')
log_noise_dist = theano.shared(np.log(noise_dist.get_value()),borrow=True)
#log_num_noise_samples = theano.shared(math.log(args.num_noise_samples)).astype(theano.config.floatX)
log_num_noise_samples = theano.shared(np.log(args.num_noise_samples,dtype=theano.config.floatX))
# Data Part of Cost Function: log ( u(w|c) / (u(w|c) + k * p_n(w))
data_scores = classifier.output[T.arange(self.y.shape[0]),self.y]
data_denom = self.logadd(data_scores, log_num_noise_samples + log_noise_dist[self.y] )
data_prob = data_scores - data_denom
# Sumation of Noise Part of Cost Function: sum over noise samples ( log ( u(x|c) / ( u(x|c) + k * p_n(x) ) ))
noise_mass = log_num_noise_samples + log_noise_dist[noise_samples] # log(k) + log(p_n(x)) for all noise samples (Shape: #instaces x k)
noise_scores = classifier.output[T.arange(noise_samples.shape[0]).reshape((-1,1)),noise_samples]
noise_denom = self.logadd(noise_scores, noise_mass)
noise_prob_sum = T.sum(noise_mass - noise_denom, axis=1)
self.cost = (
-T.mean(data_prob + noise_prob_sum)
)
self.test = (
T.sum(data_scores)
)
def CTC_train(self):
CTC_LOSSs = T.cast(T.mean(self.CTC_LOSS(), axis=0), "float32")
train_data_d = []
train_data_m = []
train_data_m_s = []
learning_rate = T.scalar()
decay = T.scalar()
seed = np.random.randint(10e6)
rng = RandomStreams(seed=seed)
grad_rate = 0.8
for data in self.train_data:
data_d = rng.binomial((1,), p=grad_rate, dtype="float32")[0]*T.grad(CTC_LOSSs, data)
train_data_d.append(data_d)
data_m_s = theano.shared(np.zeros(data.get_value().shape).astype(np.float32))
train_data_m_s.append(data_m_s)
data_m = data_m_s*decay + (1-decay)*data_d**2
train_data_m.append(data_m)
#self.grad_test = theano.function([self.X, self.Y], train_data_d[-4])
#self.data_d_print = theano.function([self.X,self.Y],train_data_d[0][0])
#upd = [(d,d-learning_rate*d_d)for d,d_d in zip(self.train_data,train_data_d)]
upd = [(d, d-learning_rate*d_d/T.sqrt(d_m+1e-4))for d,d_d,d_m in zip(self.train_data,train_data_d,train_data_m)]
upd1 = [(d_m_s, decay*d_m_s+(1-decay)*d_d**2) for d_m_s,d_d in zip(train_data_m_s,train_data_d)]
upd +=upd1
#self.test = theano.function([self.X,self.Y],train_data_d[0])
self.sgd_train = theano.function([self.X, self.Y, learning_rate, decay],
[],
updates = upd
)