def categorical_sample(logits, d, exploration=True):
# value = tf.squeeze(tf.multinomial(logits - tf.reduce_max(logits, [1], keep_dims=True), 1), [1])
temp = logits - tf.reduce_max(logits, [1], keep_dims=True)
if exploration is True:
temp = tf.multinomial(temp, 1)
elif exploration is False:
temp = tf.expand_dims(tf.argmax(temp, 1),-1)
temp = tf.squeeze(temp, [1])
temp = tf.one_hot(temp, d)
return temp
评论列表
文章目录