def __init__(self):
# init some parameters
self.replay_buffer = deque()
self.time_step = 0
self.epsilon = START_EPSILON
self.state_dim = input_dim
self.action_dim = num_output
#initialize weights and biases of deep q net
self.weights = {
'w1': tf.Variable(tf.random_normal([3, 3, 2, 150])),
'w2': tf.Variable(tf.random_normal([1, 1, 150, 1])),
'w3': tf.Variable(tf.random_normal([3,3,1,10])),
'out': tf.Variable(tf.random_normal([dim*dim*10, num_output]))
}
self.biases = {
'b1': tf.Variable(tf.random_normal([150])),
'b2': tf.Variable(tf.random_normal([1])),
'b3': tf.Variable(tf.random_normal([10])),
'out': tf.Variable(tf.random_normal([num_output]))
}
self.state_input = tf.placeholder("float",[None, self.state_dim[0] * self.state_dim[1], 2])
keep_prob = tf.placeholder(tf.float32) # dropout probability
#create deep q network
self.deep_q_network(self.state_input, self.weights, self.biases, keep_prob)
self.training_rules()
# Initialize session
self.session = tf.InteractiveSession()
self.session.run(tf.initialize_all_variables())
# saver
self.saver = tf.train.Saver()
dqn_8_exp.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录