def get_greedy_action(Q, obs, show_f=False):
xp = Q.xp
obs = xp.expand_dims(xp.asarray(obs, dtype=np.float32), 0)
with chainer.no_backprop_mode():
f = Q.feature(obs)
q = Q.predict(f)[0]
#q = Q(obs).data[0]
if show_f:
show_feature(f)
return int(xp.argmax(q))
评论列表
文章目录