def init_params(options,W):
params = OrderedDict()
# W is initialized by the pretrained word embedding
params['Wemb'] = W.astype(config.floatX)
# otherwise, W will be initialized randomly
# n_words = options['n_words']
# n_x = options['n_x']
# params['Wemb'] = uniform_weight(n_words,n_x)
length = len(options['filter_shapes'])
for idx in range(length):
params = param_init_encoder(options['filter_shapes'][idx],params,prefix=_p('cnn_encoder',idx))
n_h = options['feature_maps'] * length
params['Wy'] = uniform_weight(n_h,options['n_y'])
params['by'] = zero_bias(options['n_y'])
return params
python类floatX()的实例源码
def init_params(options,W):
n_h = options['n_h']
n_y = options['n_y']
params = OrderedDict()
# W is initialized by the pretrained word embedding
params['Wemb'] = W.astype(config.floatX)
# otherwise, W will be initialized randomly
# n_words = options['n_words']
# n_x = options['n_x']
# params['Wemb'] = uniform_weight(n_words,n_x)
# bidirectional LSTM
params = param_init_encoder(options,params,prefix="gru_encoder")
params = param_init_encoder(options,params,prefix="gru_encoder_rev")
params['Wy'] = uniform_weight(2*n_h,n_y)
params['by'] = zero_bias(n_y)
return params
def init_params(options,W):
n_h = options['n_h']
n_y = options['n_y']
params = OrderedDict()
# W is initialized by the pretrained word embedding
params['Wemb'] = W.astype(config.floatX)
# otherwise, W will be initialized randomly
# n_words = options['n_words']
# n_x = options['n_x']
# params['Wemb'] = uniform_weight(n_words,n_x)
# bidirectional LSTM
params = param_init_encoder(options,params,prefix="lstm_encoder")
params = param_init_encoder(options,params,prefix="lstm_encoder_rev")
params['Wy'] = uniform_weight(2*n_h,n_y)
params['by'] = zero_bias(n_y)
return params
def load_wemb(params, vocab):
wemb = pkl.load(open(prm.wordemb_path, 'rb'))
dim_emb_orig = wemb.values()[0].shape[0]
W = 0.01 * np.random.randn(prm.n_words, dim_emb_orig).astype(config.floatX)
for word, pos in vocab.items():
if word in wemb:
W[pos,:] = wemb[word]
if prm.dim_emb < dim_emb_orig:
pca =PCA(n_components=prm.dim_emb, copy=False, whiten=True)
W = pca.fit_transform(W)
params['W'] = W
return params
def _causal_effect(
hparams, mu1, mu1s_, mu2, mu2s_, tau_cmmn, obs1, obs2, Normal, floatX):
u"""Distribution of observations.
"""
if hparams['causality'] == [1, 2]:
# ---- Model 1: x1 -> x2 ----
x1s = obs1(mu=mu1 + mu1s_)
b = Normal('b', mu=np.float32(0.),
tau=np.float32(1 / tau_cmmn[1]), dtype=floatX)
x2s = obs2(mu=mu2 + mu2s_ + b * (x1s - mu1 - mu1s_)) \
if hparams['subtract_mu_reg'] else \
obs2(mu=mu2 + mu2s_ + b * x1s)
else:
# ---- Model 2: x2 -> x1 ----
x2s = obs2(mu=mu2 + mu2s_)
b = Normal('b', mu=np.float32(0.),
tau=np.float32(1 / tau_cmmn[0]), dtype=floatX)
x1s = obs1(mu=mu1 + mu1s_ + b * (x2s - mu2 - mu2s_)) \
if hparams['subtract_mu_reg'] else \
obs1(mu=mu1 + mu1s_ + b * x2s)
return x1s, x2s, b
def __init__(self, feature_count=None, hidden_unit_count=None, category_count=None, archive=None):
if archive is None and (feature_count is None or hidden_unit_count is None or category_count is None):
raise ValueError(
"If archive is not passed in, an " + Parameters.Globals.__name__ +
" object needs all other constructor arguments to be integers.")
if archive is None:
self.embedding_weights = theano.shared(
(0.01 * np.random.rand(feature_count, hidden_unit_count)).astype(config.floatX), # formerly 'Wemb'
self.embedding_weights_literal)
self.classifier_weights = theano.shared(
0.01 * np.random.randn(hidden_unit_count, category_count).astype(config.floatX),
self.classifier_weights_literal) # formerly 'U'
self.classifier_bias = theano.shared(np.zeros((category_count,)).astype(config.floatX),
self.classifier_bias_literal) # formerly 'b'
else:
self.load_values_from_dict(archive)
def __init__(self, hidden_unit_count=None, archive=None):
if archive is None and hidden_unit_count is None:
raise ValueError(
"If archive is not passed in, an " + Parameters.LSTM.__name__ +
" object needs hidden_unit_count argument to be an integer.")
if archive is None:
gen__r_o_v = generate_random_orthogonal_vectors
self.input_weights = theano.shared(np.concatenate([gen__r_o_v(hidden_unit_count),
gen__r_o_v(hidden_unit_count),
gen__r_o_v(hidden_unit_count),
gen__r_o_v(hidden_unit_count)], axis=1),
self.input_weights_literal) # formerly lstm_W
self.hidden_weights = theano.shared(np.concatenate([gen__r_o_v(hidden_unit_count),
gen__r_o_v(hidden_unit_count),
gen__r_o_v(hidden_unit_count),
gen__r_o_v(hidden_unit_count)], axis=1),
self.hidden_weights_literal) # formerly lstm_U
self.bias = theano.shared(np.zeros((4 * hidden_unit_count,)).astype(config.floatX),
self.bias_literal) # formerly lstm_b
else:
self.load_values_from_dict(archive)
def read(self):
# Check if there are changes to the read
filehaschanged = os.stat(self.ymlfile).st_mtime != self.lastmodified
# Update lastmodified timestamp
if filehaschanged:
self.lastmodified = os.stat(self.ymlfile).st_mtime
else:
return
# Read from file
with open(self.ymlfile, 'r') as f:
update = yaml.load(f)
# Update switches
for switchname, switchvar in self.switches.items():
# Fetch
if switchname in update.keys():
# Check if update needs to be eval-ed
if isinstance(update[switchname], str) and update[switchname].startswith('np.'):
switchvarval = eval(update[switchname])
else:
switchvarval = getattr(np, config.floatX)(update[switchname])
# Set switch variable
switchvar.set_value(switchvarval)
return
def calculate_cost(test_model, dataset, options):
batchSize = options['batchSize']
useTime = options['useTime']
costSum = 0.0
dataCount = 0
n_batches = int(np.ceil(float(len(dataset[0])) / float(batchSize)))
for index in xrange(n_batches):
batchX = dataset[0][index*batchSize:(index+1)*batchSize]
if useTime:
batchT = dataset[2][index*batchSize:(index+1)*batchSize]
x, t, lengths = padMatrixWithTime(batchX, batchT, options)
y = np.array(dataset[1][index*batchSize:(index+1)*batchSize]).astype(config.floatX)
scores = test_model(x, y, t, lengths)
else:
x, lengths = padMatrixWithoutTime(batchX, options)
y = np.array(dataset[1][index*batchSize:(index+1)*batchSize]).astype(config.floatX)
scores = test_model(x, y, lengths)
costSum += scores * len(batchX)
dataCount += len(batchX)
return costSum / dataCount
def test_select_distinct(self):
"""
Tests that MultinomialWOReplacementFromUniform always selects distinct elements
"""
p = tensor.fmatrix()
u = tensor.fvector()
n = tensor.iscalar()
m = multinomial.MultinomialWOReplacementFromUniform('auto')(p, u, n)
f = function([p, u, n], m, allow_input_downcast=True)
n_elements = 1000
all_indices = range(n_elements)
numpy.random.seed(12345)
for i in [5, 10, 50, 100, 500, n_elements]:
uni = numpy.random.rand(i).astype(config.floatX)
pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
pvals /= pvals.sum(1)
res = f(pvals, uni, i)
res = numpy.squeeze(res)
assert len(res) == i, res
assert numpy.all(numpy.in1d(numpy.unique(res), all_indices)), res
def test_select_distinct(self):
"""
Tests that multinomial_wo_replacement always selects distinct elements
"""
th_rng = RandomStreams(12345)
p = tensor.fmatrix()
n = tensor.iscalar()
m = th_rng.multinomial_wo_replacement(pvals=p, n=n)
f = function([p, n], m, allow_input_downcast=True)
n_elements = 1000
all_indices = range(n_elements)
numpy.random.seed(12345)
for i in [5, 10, 50, 100, 500, n_elements]:
pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
pvals /= pvals.sum(1)
res = f(pvals, i)
res = numpy.squeeze(res)
assert len(res) == i
assert numpy.all(numpy.in1d(numpy.unique(res), all_indices)), res
def numpy_floatX(data):
return numpy.asarray(data, dtype=floatX)
def get_variable(name, shape, initializer=None, dtype=floatX):
if initializer != None:
param = initializer(shape, dtype)
else:
param = random_normal_initializer()(shape, dtype)
return theano.shared(value=param, name=name, borrow=True)
def numpy_floatX(data):
return np.asarray(data, dtype=config.floatX)
def get_random_weight(dim1, dim2, left=-0.1, right=0.1):
return np.random.uniform(left, right, (dim1, dim2)).astype(config.floatX)
def init_params(options):
params = OrderedDict()
np.random.seed(0)
inputDimSize = options['inputDimSize']
numAncestors = options['numAncestors']
embDimSize = options['embDimSize']
hiddenDimSize = options['hiddenDimSize'] #hidden layer does not need an extra space
attentionDimSize = options['attentionDimSize']
numClass = options['numClass']
params['W_emb'] = get_random_weight(inputDimSize+numAncestors, embDimSize)
if len(options['embFile']) > 0:
params['W_emb'] = load_embedding(options)
options['embDimSize'] = params['W_emb'].shape[1]
embDimSize = options['embDimSize']
params['W_attention'] = get_random_weight(embDimSize*2, attentionDimSize)
params['b_attention'] = np.zeros(attentionDimSize).astype(config.floatX)
params['v_attention'] = np.random.uniform(-0.1, 0.1, attentionDimSize).astype(config.floatX)
params['W_gru'] = get_random_weight(embDimSize, 3*hiddenDimSize)
params['U_gru'] = get_random_weight(hiddenDimSize, 3*hiddenDimSize)
params['b_gru'] = np.zeros(3 * hiddenDimSize).astype(config.floatX)
params['W_output'] = get_random_weight(hiddenDimSize, numClass)
params['b_output'] = np.zeros(numClass).astype(config.floatX)
return params
def build_model(tparams, leavesList, ancestorsList, options):
dropoutRate = options['dropoutRate']
trng = RandomStreams(123)
use_noise = theano.shared(numpy_floatX(0.))
x = T.tensor3('x', dtype=config.floatX)
y = T.tensor3('y', dtype=config.floatX)
mask = T.matrix('mask', dtype=config.floatX)
lengths = T.vector('lengths', dtype=config.floatX)
n_timesteps = x.shape[0]
n_samples = x.shape[1]
embList = []
for leaves, ancestors in zip(leavesList, ancestorsList):
tempAttention = generate_attention(tparams, leaves, ancestors)
tempEmb = (tparams['W_emb'][ancestors] * tempAttention[:,:,None]).sum(axis=1)
embList.append(tempEmb)
emb = T.concatenate(embList, axis=0)
x_emb = T.tanh(T.dot(x, emb))
hidden = gru_layer(tparams, x_emb, options)
hidden = dropout_layer(hidden, use_noise, trng, dropoutRate)
y_hat = softmax_layer(tparams, hidden) * mask[:,:,None]
logEps = 1e-8
cross_entropy = -(y * T.log(y_hat + logEps) + (1. - y) * T.log(1. - y_hat + logEps))
output_loglikelihood = cross_entropy.sum(axis=2).sum(axis=0) / lengths
cost_noreg = T.mean(output_loglikelihood)
if options['L2'] > 0.:
cost = cost_noreg + options['L2'] * ((tparams['W_output']**2).sum() + (tparams['W_attention']**2).sum() + (tparams['v_attention']**2).sum())
return use_noise, x, y, mask, lengths, cost, cost_noreg, y_hat
def init_params(options):
params = OrderedDict()
inputSize = options['inputSize']
dimensionSize = options['dimensionSize']
rng = np.random.RandomState(1234)
params['w'] = np.asarray(rng.uniform(low=-0.1, high=0.1, size=(inputSize, dimensionSize)), dtype=theano.config.floatX)
rng = np.random.RandomState(12345)
params['w_tilde'] = np.asarray(rng.uniform(low=-0.1, high=0.1, size=(inputSize, dimensionSize)), dtype=theano.config.floatX)
params['b'] = np.zeros(inputSize).astype(theano.config.floatX)
params['b_tilde'] = np.zeros(inputSize).astype(theano.config.floatX)
return params
def build_model(tparams, options):
weightVector = T.vector('weightVector', dtype=theano.config.floatX)
iVector = T.vector('iVector', dtype='int32')
jVector = T.vector('jVector', dtype='int32')
cost = weightVector * (((tparams['w'][iVector] * tparams['w_tilde'][jVector]).sum(axis=1) + tparams['b'][iVector] + tparams['b_tilde'][jVector] - T.log(weightVector)) ** 2)
return weightVector, iVector, jVector, cost.sum()
def load_data(infile):
cooccurMap = pickle.load(open(infile, 'rb'))
I = []
J = []
Weight = []
for key, value in cooccurMap.iteritems():
I.append(key[0])
J.append(key[1])
Weight.append(weightFunction(value))
shared_I = theano.shared(np.asarray(I, dtype='int32'), borrow=True)
shared_J = theano.shared(np.asarray(J, dtype='int32'), borrow=True)
shared_Weight = theano.shared(np.asarray(Weight, dtype=theano.config.floatX), borrow=True)
return shared_I, shared_J, shared_Weight