def run(self, train=True, movie=False, enableLog=False):
self.env.reset(0,0)
self.reset_seq()
total_reward=0
for i in range(300):
# ???state????????????
old_seq = self.seq.copy()
# ?????????????
action = self.agent.get_action(old_seq,train)
# ??????????
self.env.update_state(action)
reward=self.env.get_reward()
total_reward +=reward
# ???????state???????????
state = self.env.get_state()
self.push_seq(state)
new_seq = self.seq.copy()
# ??????????????????
self.agent.experience_local(old_seq, action, reward, new_seq)
if enableLog:
self.log.append(np.hstack([old_seq[0], action, reward]))
# ????????????
if movie:
display.clear_output(wait=True)
display.display(self.env.get_svg())
time.sleep(0.01)
# ????????????????????????????
self.agent.experience_global(total_reward)
if train:
# ??????????????????
self.agent.update_model(old_seq, action, reward, new_seq)
self.agent.reduce_epsilon()
if enableLog:
return total_reward,self.log
return total_reward
评论列表
文章目录