def call(self, step_inputs, state, scope=None, initialization='gaussian'):
"""
Make one step of ISAN transition.
Args:
step_inputs: one-hot encoded inputs, shape bs x n
state: previous hidden state, shape bs x d
scope: current scope
initialization: how to initialize the transition matrices:
orthogonal: usually speeds up training, orthogonalize Gaussian matrices
gaussian: sample gaussian matrices with a sensible scale
"""
d = self._num_units
n = step_inputs.shape[1].value
if initialization == 'orthogonal':
wx_ndd_init = np.zeros((n, d * d), dtype=np.float32)
for i in range(n):
wx_ndd_init[i, :] = orth(np.random.randn(d, d)).astype(np.float32).ravel()
wx_ndd_initializer = tf.constant_initializer(wx_ndd_init)
elif initialization == 'gaussian':
wx_ndd_initializer = tf.random_normal_initializer(stddev=1.0 / np.sqrt(d))
else:
raise Exception('Unknown init type: %s' % initialization)
wx_ndd = tf.get_variable('Wx', shape=[n, d * d],
initializer=wx_ndd_initializer)
bx_nd = tf.get_variable('bx', shape=[n, d],
initializer=tf.zeros_initializer())
# Multiplication with a 1-hot is just row selection.
# As of Jan '17 this is faster than doing gather.
Wx_bdd = tf.reshape(tf.matmul(step_inputs, wx_ndd), [-1, d, d])
bx_bd = tf.reshape(tf.matmul(step_inputs, bx_nd), [-1, 1, d])
# Reshape the state so that matmul multiplies different matrices
# for each batch element.
single_state = tf.reshape(state, [-1, 1, d])
new_state = tf.reshape(tf.matmul(single_state, Wx_bdd) + bx_bd, [-1, d])
return new_state, new_state
python类orth()的实例源码
def generate(self, type="2d"):
if type == "2d":
M = np.random.rand(self.ndims, self.ndims)
print (M)
M = sLA.orth(M)
print (M)
S = np.dot(np.diag([0, .25]), np.random.randn(self.ndims, self.l))
print ("S.shape", S.shape)
print (S)
A = np.dot(M, S)
print ("A.shape", A.shape)
# print A
return(A, S, M)
elif type == "close":
S = 2 * (np.random.rand(self.ndims, self.l) - 0.5)
A = S
print (A.shape)
# A(2:end,:) = A(2:end,:) + A(1:end-1, :)/2;
A[1:-1,:] = A[1:-1,:] + A[0:-2, :]/2.
return (A, S, np.zeros((1,1)))
elif type == "noisysinewave":
t = np.linspace(0, 2 * np.pi, self.l)
sine = np.sin(t * 10)
# sine = 1.2 * (np.random.rand(1, self.l) - 0.5)
# nu = 0.1 * (np.random.rand(1, self.l) - 0.5)
# sine = 2.3 * np.random.randn(1, self.l)
nu = 0.7 * np.random.randn(1, self.l)
c1 = (2.7 * sine) + (2 * nu)
c2 = (1.1 * sine) + (1.2 * nu)
A = np.vstack((c1, c2))
print (A.shape)
# A(2:end,:) = A(2:end,:) + A(1:end-1, :)/2;
# A[1:-1,:] = A[1:-1,:] + A[0:-2, :]/2.
return (A, np.zeros((2, self.l)), np.zeros((1,1)))
def initialization(Xs, u_0s, results):
# results is a dictionary from preprocessing
# initialize U, U_orth, and dcovs
# U_orth keeps track of the orthogonal space of U
### for first dim, initialize u with either user input or randomly
num_datasets = results['num_datasets'];
u = [];
for iset in range(num_datasets):
num_vars = Xs[iset].shape[0];
if (u_0s != [] and u_0s[iset].shape[0] == num_vars): # if user input initialized weights for first dim
u.append(u_0s[iset][:,1]);
else:
u.append(orth(np.random.randn(num_vars, 1)));
### get initial recentered matrices for each dataset based on u
R = [];
if (results['num_stoch_batch_samples'] == 0): # only for full gradient descent
for iset in range(num_datasets):
R.append(get_recentered_matrix(u[iset], Xs[iset]));
total_dcov = get_total_dcov(R,results['D_given']);
total_dcov_old = total_dcov * 0.5; # set old value to half, so it'll pass threshold
### stochastic gradient descent initialization
momented_gradf = [];
stoch_learning_rate = 1; # initial learning rate for SGD
if (results['num_stoch_batch_samples'] > 0):
for iset in range(num_datasets):
momented_gradf.append(np.zeros(u[iset].shape));
total_dcov = get_total_dcov_randomlysampled(u, Xs, results['D_given'], results);
total_dcov_old = total_dcov * 0.5;
return u, momented_gradf, R, total_dcov, total_dcov_old, stoch_learning_rate, results;