def __init__(self, actions):
self.replayMemory = deque()
self.timeStep = 0
self.epsilon = INITIAL_EPSILON
self.actions = actions
self.files = 0
self.currentQNet = QNet(len(actions))
self.targetQNet = QNet(len(actions))
self.actionInput = tf.placeholder("float", [None, len(actions)],name="actions_one_hot")
self.yInput = tf.placeholder("float", [None],name="y")
self.action_mask = tf.multiply(self.currentQNet.QValue, self.actionInput)
self.Q_action = tf.reduce_sum(self.action_mask, reduction_indices=1)
self.delta = delta = tf.subtract(self.Q_action, self.yInput)
self.loss = tf.where(tf.abs(delta) < 1.0, 0.5 * tf.square(delta), tf.abs(delta) - 0.5)
#self.loss = tf.square(tf.subtract( self.Q_action, self.yInput ))
self.cost = tf.reduce_mean(self.loss)
self.trainStep = tf.train.RMSPropOptimizer(learning_rate=RMS_LEARNING_RATE,momentum=RMS_MOMENTUM,epsilon= RMS_EPSILON,decay=RMS_DECAY).minimize(
self.cost)
#
评论列表
文章目录