DQN_J2.py 文件源码-python代码片段

DQN_J2.py 文件源码

python

阅读 30 收藏 0 点赞 0 评论 0

项目：OpenAI_Challenges 作者: AlwaysLearningDeeper 项目源码文件源码

def __init__(self, actions):
        self.replayMemory = deque()
        self.timeStep = 0
        self.epsilon = INITIAL_EPSILON
        self.actions = actions
        self.files = 0
        self.currentQNet = QNet(len(actions))
        self.targetQNet = QNet(len(actions))

        self.actionInput = tf.placeholder("float", [None, len(actions)],name="actions_one_hot")
        self.yInput = tf.placeholder("float", [None],name="y")

        self.action_mask = tf.multiply(self.currentQNet.QValue, self.actionInput)
        self.Q_action = tf.reduce_sum(self.action_mask, reduction_indices=1)

        self.delta = delta = tf.subtract(self.Q_action, self.yInput)

        self.loss = tf.where(tf.abs(delta) < 1.0, 0.5 * tf.square(delta), tf.abs(delta) - 0.5)
        #self.loss = tf.square(tf.subtract( self.Q_action, self.yInput ))

        self.cost = tf.reduce_mean(self.loss)
        self.trainStep = tf.train.RMSPropOptimizer(learning_rate=RMS_LEARNING_RATE,momentum=RMS_MOMENTUM,epsilon= RMS_EPSILON,decay=RMS_DECAY).minimize(
            self.cost)
        #