def _sample(self, n_samples):
if self.logits.get_shape().ndims == 2:
logits_flat = self.logits
else:
logits_flat = tf.reshape(self.logits, [-1, self.n_categories])
samples_flat = tf.transpose(tf.multinomial(logits_flat, n_samples))
samples_flat = tf.cast(samples_flat, self.dtype)
if self.logits.get_shape().ndims == 2:
return samples_flat
shape = tf.concat([[n_samples], self.batch_shape], 0)
samples = tf.reshape(samples_flat, shape)
static_n_samples = n_samples if isinstance(n_samples, int) else None
samples.set_shape(
tf.TensorShape([static_n_samples]).concatenate(
self.get_batch_shape()))
return samples
python类multinomial()的实例源码
def _sample(self, n_samples):
n = self.n_experiments
if self.logits.get_shape().ndims == 1:
logits_flat = self.logits
else:
logits_flat = tf.reshape(self.logits, [-1])
log_1_minus_p = -tf.nn.softplus(logits_flat)
log_p = logits_flat + log_1_minus_p
stacked_logits_flat = tf.stack([log_1_minus_p, log_p], axis=-1)
samples_flat = tf.transpose(
tf.multinomial(stacked_logits_flat, n_samples * n))
shape = tf.concat([[n, n_samples], self.batch_shape], 0)
samples = tf.reduce_sum(tf.reshape(samples_flat, shape), axis=0)
static_n_samples = n_samples if isinstance(n_samples, int) else None
static_shape = tf.TensorShape([static_n_samples]).concatenate(
self.get_batch_shape())
samples.set_shape(static_shape)
return tf.cast(samples, self.dtype)
def _sample(self, n_samples):
if self.logits.get_shape().ndims == 2:
logits_flat = self.logits
else:
logits_flat = tf.reshape(self.logits, [-1, self.n_categories])
samples_flat = tf.transpose(
tf.multinomial(logits_flat, n_samples * self.n_experiments))
shape = tf.concat([[n_samples, self.n_experiments],
self.batch_shape], 0)
samples = tf.reshape(samples_flat, shape)
static_n_samples = n_samples if isinstance(n_samples, int) else None
static_n_exps = self.n_experiments if isinstance(self.n_experiments,
int) else None
samples.set_shape(
tf.TensorShape([static_n_samples, static_n_exps]).
concatenate(self.get_batch_shape()))
samples = tf.reduce_sum(
tf.one_hot(samples, self.n_categories, dtype=self.dtype), axis=1)
return samples
def _sample(self, n_samples):
if self.logits.get_shape().ndims == 2:
logits_flat = self.logits
else:
logits_flat = tf.reshape(self.logits, [-1, self.n_categories])
samples_flat = tf.transpose(tf.multinomial(logits_flat, n_samples))
if self.logits.get_shape().ndims == 2:
samples = samples_flat
else:
shape = tf.concat([[n_samples], self.batch_shape], 0)
samples = tf.reshape(samples_flat, shape)
static_n_samples = n_samples if isinstance(n_samples,
int) else None
samples.set_shape(
tf.TensorShape([static_n_samples]).
concatenate(self.get_batch_shape()))
samples = tf.one_hot(samples, self.n_categories, dtype=self.dtype)
return samples
def sampleAction(self, states):
# TODO: use this code piece when tf.multinomial gets better
# sample action from current policy
# actions = self.session.run(self.predicted_actions, {self.states: states})[0]
# return actions[0]
# temporary workaround
def softmax(y):
""" simple helper function here that takes unnormalized logprobs """
maxy = np.amax(y)
e = np.exp(y - maxy)
return e / np.sum(e)
# epsilon-greedy exploration strategy
if random.random() < self.exploration:
return random.randint(0, self.num_actions-1)
else:
action_scores = self.session.run(self.action_scores, {self.states: states})[0]
action_probs = softmax(action_scores) - 1e-5
action = np.argmax(np.random.multinomial(1, action_probs))
return action
def sampleAction(self, states):
# TODO: use this code piece when tf.multinomial gets better
# sample action from current policy
# actions = self.session.run(self.predicted_actions, {self.states: states})[0]
# return actions[0]
# temporary workaround
def softmax(y):
""" simple helper function here that takes unnormalized logprobs """
maxy = np.amax(y)
e = np.exp(y - maxy)
return e / np.sum(e)
# epsilon-greedy exploration strategy
if random.random() < self.exploration:
return random.randint(0, self.num_actions-1)
else:
action_scores = self.session.run(self.action_scores, {self.states: states})[0]
action_probs = softmax(action_scores) - 1e-5
action = np.argmax(np.random.multinomial(1, action_probs))
return action
def __init__(self, name, inputs, conv_outputs, reward_scaling, config):
with tf.variable_scope(name):
hidden = tf.layers.dense(conv_outputs, 256, tf.nn.relu, name='hidden')
value = tf.layers.dense(hidden, 1)
self.value = tf.squeeze(
inputs.alive * reward_scaling.unnormalize_output(value),
axis=1,
name='value')
actions = tf.layers.dense(hidden, config.num_actions, name='actions')
self.policy = tf.nn.softmax(actions, name='policy')
self.log_policy = tf.nn.log_softmax(actions, name='log_policy')
# Sample action from policy
self.greedy_action = tf.squeeze(
tf.multinomial(self.log_policy, num_samples=1),
axis=1,
name='greedy_action')
def build_forward(self, _input):
output = _input # [batch_size, num_steps, rnn_units]
feature_dim = int(output.get_shape()[2]) # rnn_units
output = tf.reshape(output, [-1, feature_dim]) # [batch_size * num_steps, rnn_units]
final_activation = 'sigmoid' if self.out_dim == 1 else 'softmax'
if self.net_type == 'simple':
net_config = [] if self.net_config is None else self.net_config
with tf.variable_scope('wider_actor'):
for layer in net_config:
units, activation = layer.get('units'), layer.get('activation', 'relu')
output = BasicModel.fc_layer(output, units, use_bias=True)
output = BasicModel.activation(output, activation)
logits = BasicModel.fc_layer(output, self.out_dim, use_bias=True) # [batch_size * num_steps, out_dim]
probs = BasicModel.activation(logits, final_activation) # [batch_size * num_steps, out_dim]
probs_dim = self.out_dim
if self.out_dim == 1:
probs = tf.concat([1 - probs, probs], axis=1)
probs_dim = 2
self.decision = tf.multinomial(tf.log(probs), 1) # [batch_size * num_steps, 1]
self.decision = tf.reshape(self.decision, [-1, self.num_steps]) # [batch_size, num_steps]
self.probs = tf.reshape(probs, [-1, self.num_steps, probs_dim]) # [batch_size, num_steps, out_dim]
else:
raise ValueError('Do not support %s' % self.net_type)
def _argmax_or_mcsearch(embedding, output_projection=None, update_embedding=True, mc_search=False):
def loop_function(prev, _):
if output_projection is not None:
prev = nn_ops.xw_plus_b(prev, output_projection[0], output_projection[1])
if isinstance(mc_search, bool):
#tf.multinomial???prev????????? ?-1??????????
prev_symbol = tf.reshape(tf.multinomial(prev, 1), [-1]) if mc_search else math_ops.argmax(prev, 1)
else:
prev_symbol = tf.cond(mc_search, lambda: tf.reshape(tf.multinomial(prev, 1), [-1]), lambda: tf.argmax(prev, 1))
emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol)
#???????????
if not update_embedding:
emb_prev = array_ops.stop_gradient(emb_prev)
return emb_prev
return loop_function
def sample(params, eps, dist='gauss'):
""" utility function for sampling from distributions, given noise """
if 'bin' in dist:
logits = params[-1]
params = params[:-1]
if 'gauss' in dist:
mean, cov = params
s = mean + tf.sqrt(cov) * eps
elif 'gm' in dist:
means, covs, pi_logits = params
choices = tf.multinomial(pi_logits, num_samples=1)
batch_size = choices.get_shape()[0]
ids = tf.constant(list(range(batch_size)), dtype=tf.int64, shape=(batch_size, 1))
idx_tensor = tf.concat([ids, choices], axis=1)
chosen_means = tf.gather_nd(means, idx_tensor)
chosen_covs = tf.gather_nd(covs, idx_tensor)
s = chosen_means + tf.sqrt(chosen_covs) * eps
else:
raise NotImplementedError
if 'bin' in dist:
sig = tf.sigmoid(logits)
s = tf.concat([s, sig], axis=1)
return s
def sample_with_temperature(logits, temperature):
"""Either argmax or random sampling.
Args:
logits: a Tensor.
temperature: a float 0.0=argmax 1.0=random
Returns:
a Tensor with one fewer dimension than logits.
"""
if temperature == 0.0:
return tf.argmax(logits, -1)
else:
assert temperature > 0.0
reshaped_logits = (
tf.reshape(logits, [-1, shape_list(logits)[-1]]) / temperature)
choices = tf.multinomial(reshaped_logits, 1)
choices = tf.reshape(choices,
shape_list(logits)[:logits.get_shape().ndims - 1])
return choices
def sample_action(self, observation):
"""
Samples an action from \pi_\theta(a|s)
tf ops are eliminated on purpose here since this is a hot code path and
were optimizing for CPU usage...or maybe tf.multinomial is just slow in general.
Using TF ops:
sample_action_op = tf.squeeze(tf.nn.softmax(self.net.logits))
action = tf.multinomial(sample_action_op)
"""
# TODO: ensure this works when num_actions > 1
action_probs = self.net.sess.run(
self.net.logits,
{self.net.obs: [observation]}
)[0]
action = np.random.choice(np.arange(len(action_probs)), p = self.softmax(action_probs))
return [action]
def choose_action(self):
if self.config.mode == 'discrete':
return tf.multinomial(tf.log(self.a_prob), 1)[0][0] # ???????tf.log??????action_dim??
elif self.config.mode == 'continuous':
# axis = 0?????0??squeeze
sample_action = self.action_normal_dist.sample(1) * self.config.ACTION_GAP + self.config.ACTION_BOUND[0]
return tf.clip_by_value(tf.squeeze(sample_action, axis=0),
self.config.ACTION_BOUND[0],
self.config.ACTION_BOUND[1])[0]
def __call__(self, prev_output):
""" Use TODO formula
Args:
prev_output (tf.Tensor): the ouput on which applying the transformation
Return:
tf.Ops: the processing operator
"""
# prev_output size: [batch_size, nb_labels]
nb_labels = prev_output.get_shape().as_list()[-1]
if False: # TODO: Add option to control argmax
#label_draws = tf.argmax(prev_output, 1)
label_draws = tf.multinomial(tf.log(prev_output), 1) # Draw 1 sample from the distribution
label_draws = tf.squeeze(label_draws, [1])
self.chosen_labels.append(label_draws)
next_input = tf.one_hot(label_draws, nb_labels)
return next_input
# Could use the Gumbel-Max trick to sample from a softmax distribution ?
soft_values = tf.exp(tf.div(prev_output, self.temperature)) # Pi = exp(pi/t)
# soft_values size: [batch_size, nb_labels]
normalisation_coeff = tf.expand_dims(tf.reduce_sum(soft_values, 1), -1)
# normalisation_coeff size: [batch_size, 1]
probs = tf.div(soft_values, normalisation_coeff + 1e-8) # = Pi / sum(Pk)
# probs size: [batch_size, nb_labels]
label_draws = tf.multinomial(tf.log(probs), 1) # Draw 1 sample from the log-probability distribution
# probs label_draws: [batch_size, 1]
label_draws = tf.squeeze(label_draws, [1])
# label_draws size: [batch_size,]
self.chosen_labels.append(label_draws)
next_input = tf.one_hot(label_draws, nb_labels) # Reencode the next input vector
# next_input size: [batch_size, nb_labels]
return next_input
def sample(x):
return tf.multinomial(tf.log(x), 1)
def __init__(self, dim):
self._dim = dim
weights_var = tf.placeholder(
dtype=tf.float32,
shape=(None, dim),
name="weights"
)
self._f_sample = tensor_utils.compile_function(
inputs=[weights_var],
outputs=tf.multinomial(weights_var, num_samples=1)[:, 0],
)
def sample_sym(self, dist_info):
probs = dist_info["prob"]
samples = tf.multinomial(tf.log(probs + 1e-8), num_samples=1)[:, 0]
return tf.nn.embedding_lookup(np.eye(self.dim, dtype=np.float32), samples)
def random_category(self, batch_size, size, dtype):
prior = tf.ones([batch_size, size])*1./size
dist = tf.log(prior + TINY)
sample=tf.multinomial(dist, num_samples=1)[:, 0]
return tf.one_hot(sample, size, dtype=dtype)
def sample_and_embed(embedding, temperature, output_list=None,
output_projection=None):
"""Returns a callable (usable as a loop_fn for seq2seq) which takes a
sample from a batch of outputs and embeds them. Optionally applies a
projection first.
Args:
embedding: an embedding matrix to lookup symbols in.
temperature: temperature to control the pointiness of the softmax.
output_list (Optional): a list in which to collect the samples.
Default None means don't collect them at all.
output_proj (Optional): tuple (weight, biases) used to project outputs.
If None (default), no projection is performed.
Returns:
embedding from embedding.
"""
def _sample_embed(prev, _):
var = _maybe_project(prev, output_projection)
var /= temperature
next_ = tf.multinomial(var, 1)
# get rid of the num_samples dimension
next_ = tf.squeeze(next_)
# maybe store it
if output_list is not None:
output_list.append(next_)
# look up the embedding
next_ = tf.nn.embedding_lookup(
embedding, next_)
return next_
return _sample_embed
def sample(self, dist_info):
prob = dist_info["prob"]
ids = tf.multinomial(tf.log(prob + TINY), num_samples=1)[:, 0]
onehot = tf.constant(np.eye(self.dim, dtype=np.float32))
return tf.nn.embedding_lookup(onehot, ids)
def multinomial_3d(x):
"""Samples from a multinomial distribution from 3D Tensor.
Args:
x: Tensor with shape (batch_size, timesteps, classes)
Returns:
Tensor with shape (batch_size, timesteps), sampled from `classes`.
"""
a, b = tf.shape(x)[0], tf.shape(x)[1]
x = tf.reshape(x, (a * b, -1))
m = tf.multinomial(x, 1)
return tf.reshape(m, (a, b))
def multinomial_2d(x):
"""Samples from a multinomial distribution from 2D Tensor.
Args:
x: Tensor with shape (batch_size, classes)
Returns:
Tensor with shape (batch_size), sampled from `classes`.
"""
a = tf.shape(x)[0]
m = tf.multinomial(x, 1)
return tf.reshape(m, (a,))
def categorical_sample(logits, d, exploration=True):
# value = tf.squeeze(tf.multinomial(logits - tf.reduce_max(logits, [1], keep_dims=True), 1), [1])
temp = logits - tf.reduce_max(logits, [1], keep_dims=True)
if exploration is True:
temp = tf.multinomial(temp, 1)
elif exploration is False:
temp = tf.expand_dims(tf.argmax(temp, 1),-1)
temp = tf.squeeze(temp, [1])
temp = tf.one_hot(temp, d)
return temp
def __init__(self, dim):
self._dim = dim
weights_var = tf.placeholder(
dtype=tf.float32,
shape=(None, dim),
name="weights"
)
self._f_sample = tensor_utils.compile_function(
inputs=[weights_var],
outputs=tf.multinomial(tf.log(weights_var + 1e-8), num_samples=1)[:, 0],
)
def sample_sym(self, dist_info):
probs = dist_info["prob"]
samples = tf.multinomial(tf.log(probs + 1e-8), num_samples=1)[:, 0]
return tf.nn.embedding_lookup(np.eye(self.dim, dtype=np.float32), samples)
def rollout(self, doing_eval=False):
""" run one episode collecting observations, actions and advantages"""
observations, actions, rewards = [], [], []
observation = self.env.reset()
done = False
while not done:
observations.append(observation)
action = self.sample_action_given(observation, doing_eval)
assert action != 5, "FAIL! (multinomial logits sampling bug?"
observation, reward, done, _ = self.env.step(action)
actions.append(action)
rewards.append(reward)
if VERBOSE_DEBUG:
print "rollout: actions=%s" % (actions)
return observations, actions, rewards
def __init__(self, n_options, sess):
self.n_options = n_options
self.sess = sess
self.picker = tf.multinomial([self.n_options * [1.]], 1)[0][0]
def corrupt(tensor, corruption_level=0.05):
"""Uses the masking noise algorithm to mask corruption_level proportion
of the input.
:param tensor: A tensor whose values are to be corrupted.
:param corruption_level: An int [0, 1] specifying the probability to corrupt each value.
:return: The corrupted tensor.
"""
total_samples = tf.reduce_prod(tf.shape(tensor))
corruption_matrix = tf.multinomial(tf.log([[corruption_level, 1 - corruption_level]]), total_samples)
corruption_matrix = tf.cast(tf.reshape(corruption_matrix, shape=tf.shape(tensor)), dtype=tf.float32)
return tf.mul(tensor, corruption_matrix)
def _create_model(self):
depth = len(self.vocab)+1
self._create_rnn()
with tf.name_scope('loss'):
self.logits = tf.contrib.layers.fully_connected(inputs=self.output, num_outputs=depth, activation_fn=None)
self.labels = tf.one_hot(self.seq, depth=depth)
self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.labels[:, 1:], logits=self.logits[:, :-1]), name='loss')
with tf.name_scope('sample'):
self.sample = tf.multinomial(self.logits[:, -1] / self.temp, 1)[:, 0]
return self.loss, self.sample, self.in_state, self.out_state
def _compute_specific(self, predicted, targets):
# Generate random predictions with equal probabilities
random = tf.multinomial(logits=tf.log([[10.] * self._num_classes]),
num_samples=tf.shape(targets)[0])
# Output prediction as ratio of matches
eq = tf.equal(x=random, y=tf.cast(targets, tf.int64))
return tf.reduce_mean(tf.cast(eq, tf.float32))