def main():
game_width = 12
game_height = 9
nb_frames = 4
actions = ((-1, 0), (1, 0), (0, -1), (0, 1), (0, 0))
# Recipe of deep reinforcement learning model
model = Sequential()
model.add(Convolution2D(
16,
nb_row=3,
nb_col=3,
activation='relu',
input_shape=(nb_frames, game_height, game_width)))
model.add(Convolution2D(32, nb_row=3, nb_col=3, activation='relu'))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(len(actions)))
model.compile(RMSprop(), 'MSE')
agent = Agent(
model, nb_frames, snake_game, actions, size=(game_width, game_height))
agent.train(nb_epochs=10000, batch_size=64, gamma=0.8, save_model=True)
agent.play(nb_rounds=10)
python类Agent()的实例源码
def agent(machine_manager, volume_manager):
return Agent(machine_manager, volume_manager, {
'agent_ttl': 60,
'max_error_count': 3,
'max_error_timeout': 10,
'node': {
'volume_path': '/mnt',
'conf_path': '/etc/cobalt.conf',
'max_fill': 0.8,
'conf': {
'name': 'test-node',
'labels': ['ssd']
}
},
'watch_timeout': 10
})
def m_agent_service(volume_manager, machine_manager, m_driver, m_node):
agent = Agent(volume_manager, machine_manager, {
'agent_ttl': 60,
'max_error_count': 3,
'max_error_timeout': 10,
'node': {
'volume_path': '/mnt',
'conf_path': '/etc/cobalt.conf',
'max_fill': 0.8,
'conf': {
'name': 'test-node',
'labels': ['ssd']
}
},
'watch_timeout': 10
})
agent._driver = m_driver
agent._node = m_node
return agent
def thread_agent(self):
"""
This is the entry point for the thread that handles
all communication with the Supernova bus.
It communicates back to the main ConnOps loop via
multithreaded Event objects.
"""
while True:
try:
# Set up the command handlers
self.agent = Agent()
self.agent.service_handler["Telemetry Packet"] = self.on_telemetry
# Run
self.agent.bind_udp_sockets()
self.agent.run() # should never exit
except Exception as ex:
# NOTE: It is an error to ever reach this line.
# Catch and swallow all exceptions.
1 + 1
# NOTE: It is an error to ever reach this line.
self.agent_errors = self.agent_errors + 1
def parse_options():
parser = ArgumentParser("pyofagent - Python-based Open Flow Agent")
parser.add_argument("-c", "--controller", #dest="controller",
help="Controller host:port to connect to", metavar="HOST:PORT",
default="localhost:6633")
parser.add_argument("-d", "--devid", dest="datapath_id",
help="Device identified", metavar="DEVID",
default=42)
parser.add_argument("-v", "--verbose", action='store_true', #dest=verbose,
default="enable verbose logging (log-level is DEBUG)")
parser.add_argument("-I", "--in-out-iface", metavar="IN-OUT-IFACE",
help="Local interface to receve/send in-out frames",)
parser.add_argument("-S", "--in-out-stag", metavar="IN-OUT-STAG",
help="Expect/Apply given s-tag when receiving/sending frames"+
"at the in-out interface")
return parser.parse_args()
def main():
args = parse_options()
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
store = ObjectStore()
backend = MockBackend(store, in_out_iface=args.in_out_iface,
in_out_stag=None if args.in_out_stag is None else int(args.in_out_stag))
agent = Agent(args.controller, int(args.datapath_id), store, backend)
store.set_agent(agent)
backend.set_agent(agent)
try:
agent.run()
except KeyboardInterrupt:
logging.info("Ctrl-c received! Shutting down connection and exiting...")
agent.stop()
backend.stop()
def run(self):
self.stats.start()
for i in np.arange(FLAGS.nb_concurrent):
self.agents.append(Agent(i, self.prediction_q, self.training_q, self.stats.episode_log_q))
self.agents[-1].start()
for i in np.arange(FLAGS.nb_trainers):
self.trainers.append(Trainer(self, i))
self.trainers[-1].start()
for i in np.arange(FLAGS.nb_predictors):
self.predictors.append(Predictor(self, i))
self.predictors[-1].start()
while True:
if self.stats.episode_count.value % FLAGS.checkpoint_interval:
self.save_model()
time.sleep(0.05)
def __init__(self, expert, **kwargs):
"""
expert: an Agent implementing the policy-to-be-imitated
The Imitator will sometimes call expert.act() in order to get training data
We promise that the number of calls to expert.act() will be sublinear
in the number of calls to Imitator.act().
Note that each Agent has immutable state,
but calling methods on an Imitator may cause updates to external parameters,
and these parameters may affect the behavior of existing Agent objects
"""
super(Imitator, self).__init__(**kwargs)
self.expert = expert
#the act method is responsible for sometimes calling expert.act() to gather training data
#it is also responsible for updating the agent's parameters
def __init__(self, reward, **kwargs):
"""
reward: a function that acts on a pair of observation and action sequences,
and returns a reward in [0, 1]
The agent will sometimes call reward() in order to evaluate a partial trajectory.
Its goal is to maximize the total reward
of all of the complete trajectories produced by calling act().
We say that a trajectory is complete if act() was never called
on the agent's state at the end of the trajectory.
Note that each Agent has immutable state,
but calling methods on an RL agent may cause updates to external parameters,
and these parameters may affect the behavior of existing Agent objects
"""
super(IntrinsicRL, self).__init__(**kwargs)
self.reward = reward
#the act method is responsible for sometimes calling reward() to gather training data,
#and for updating the agent's parameters
def setup_services(self):
"""A method to prepare the possible services that Cobalt can have."""
self._service_endpoints = {
'engine': Engine(self.etcd, self.volume_manager,
self.machine_manager, self.config['engine']),
'api': Api(self.volume_manager, self.config['api']),
'agent': Agent(self.machine_manager, self.volume_manager, self.config['agent'])
}
def __init__(self, agent, player):
self.Agent = agent
self.player = player
self.ACTION_NUM = agent.dim_actions
self.STATE_NUM = agent.dim_states
self.RLMemory_num = 20
self.SLMemory_num = 20
self.RLMemory = deque(maxlen=self.RLMemory_num)
self.SLMemory = deque(maxlen=self.SLMemory_num)
# self.Q = DQN.DQN_DouDiZhu(self.ACTION_NUM, self.STATE_NUM, self.RLMemory, self.RLMemory_num, self.player)
# self.Pi = SLN.Pi(self.ACTION_NUM, self.STATE_NUM, self.SLMemory, self.SLMemory_num, self.player)
self.EPSILON = 0.06
self.ETA = 0.1
self.EPISODE_NUM = 5000000
self.Q_enable = False
def process(self, request, headers=None):
""" Processes the API.ai request and return an agent with the action
performed in it
Args:
request (:obj:`dict`): request received from API.ai
headers (:obj:`dict`, optional): headers of the HTTP request to verify it
Returns:
:obj:`apiai_assistant.agent.Agent`: Agent instance with the action performed in it
"""
agent_instance = agent.Agent(
corpus=self.corpus,
request=request,
ssml=self._ssml
)
if headers and type(headers) is dict:
h_magic_key = headers.get('magic-key')
if h_magic_key and self.magic_key and h_magic_key != self.magic_key:
agent_instance.error('Could not verify request',
code=agent.Status.AccessDenied)
return agent_instance
if self.validate(agent_instance):
action = self.action_map[agent_instance.parser.action]
action(agent_instance)
logging.debug('- Response: {}'.format(
agent_instance.response.to_dict()))
return agent_instance
def main_train(tf_configs=None):
s_t = time.time()
tf.reset_default_graph()
if not os.path.exists(model_path):
os.makedirs(model_path)
global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
with tf.device('/gpu:2'):
optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-6)
master_network = network.ACNetwork('global', optimizer, shape=cfg.new_img_dim) # Generate global network
num_workers = 16
agents = []
# Create worker classes
for i in range(num_workers):
agents.append(agent.Agent(DoomGame(), i, s_size, a_size, optimizer, model_path, global_episodes))
saver = tf.train.Saver(max_to_keep=100)
with tf.Session(config=tf_configs) as sess:
coord = tf.train.Coordinator()
if load_model:
print('Loading Model...')
ckpt = tf.train.get_checkpoint_state(model_path)
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.global_variables_initializer())
# This is where the asynchronous magic happens.
# Start the "work" process for each worker in a separate threat.
worker_threads = []
for ag in agents:
agent_train = lambda: ag.train_a3c(max_episode_length, gamma, sess, coord, saver)
t = threading.Thread(target=(agent_train))
t.start()
time.sleep(0.5)
worker_threads.append(t)
coord.join(worker_threads)
print("training ends, costs{}".format(time.time() - s_t))
def main_play(tf_configs=None):
tf.reset_default_graph()
with tf.Session(config=tf_configs) as sess:
ag = agent.Agent(DoomGame(), 0, s_size, a_size, play=True)
print('Loading Model...')
saver = tf.train.Saver()
ckpt = tf.train.get_checkpoint_state(model_path)
saver.restore(sess, os.path.join(model_path, 'model-200.ckpt'))
print('Successfully loaded!')
ag.play_game(sess, 10)
def main_train(tf_configs=None):
s_t = time.time()
tf.reset_default_graph()
if not os.path.exists(cfg.model_path):
os.makedirs(cfg.model_path)
global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
with tf.device("/gpu:0"):
optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-5)
global_network = network.ACNetwork('global', optimizer, img_shape=cfg.IMG_SHAPE)
num_workers = cfg.AGENTS_NUM
agents = []
# Create worker classes
for i in range(num_workers):
agents.append(agent.Agent(DoomGame(), i, s_size, a_size, optimizer, cfg.model_path, global_episodes))
saver = tf.train.Saver(max_to_keep=100)
with tf.Session(config=tf_configs) as sess:
coord = tf.train.Coordinator()
if load_model:
print('Loading Model...')
ckpt = tf.train.get_checkpoint_state(cfg.model_path)
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.global_variables_initializer())
# This is where the asynchronous magic happens.
# Start the "work" process for each worker in a separate threat.
worker_threads = []
for ag in agents:
agent_train = lambda: ag.train_a3c(max_episode_length, gamma, sess, coord, saver)
t = threading.Thread(target=(agent_train))
t.start()
time.sleep(0.5)
worker_threads.append(t)
coord.join(worker_threads)
print("training ends, costs{}".format(time.time() - s_t))
def main_play(tf_configs=None):
tf.reset_default_graph()
with tf.Session(config=tf_configs) as sess:
ag = agent.Agent(DoomGame(), 0, play=True)
print('Loading Model...')
saver = tf.train.Saver()
ckpt = tf.train.get_checkpoint_state(cfg.model_path)
saver.restore(sess, os.path.join(cfg.model_path, cfg.model_file))
print('Successfully loaded!')
ag.play_game(sess, 10)
def main_train(tf_configs=None):
s_t = time.time()
tf.reset_default_graph()
if not os.path.exists(model_path):
os.makedirs(model_path)
with tf.device("/cpu:0"):
global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-5)
master_network = network.ACNetwork('global', optimizer) # Generate global network
num_workers = 16
agents = []
# Create worker classes
for i in range(num_workers):
agents.append(agent.Agent(DoomGame(), i, s_size, a_size, optimizer, model_path, global_episodes))
saver = tf.train.Saver(max_to_keep=100)
with tf.Session(config=tf_configs) as sess:
coord = tf.train.Coordinator()
if load_model:
print('Loading Model...')
ckpt = tf.train.get_checkpoint_state(model_path)
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.global_variables_initializer())
# This is where the asynchronous magic happens.
# Start the "work" process for each worker in a separate threat.
worker_threads = []
for ag in agents:
agent_train = lambda: ag.train_a3c(max_episode_length, gamma, sess, coord, saver)
t = threading.Thread(target=(agent_train))
t.start()
time.sleep(0.5)
worker_threads.append(t)
coord.join(worker_threads)
print("training ends, costs{}".format(time.time() - s_t))
def main_train(tf_configs=None):
s_t = time.time()
tf.reset_default_graph()
if not os.path.exists(cfg.model_path):
os.makedirs(cfg.model_path)
global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
with tf.device("/gpu:0"):
optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-5)
global_network = network.ACNetwork('global', optimizer, img_shape=cfg.IMG_SHAPE)
num_workers = cfg.AGENTS_NUM
agents = []
# Create worker classes
for i in range(num_workers):
agents.append(agent.Agent(DoomGame(), i, optimizer, cfg.model_path, global_episodes, task_name='D3_battle'))
saver = tf.train.Saver(max_to_keep=100)
with tf.Session(config=tf_configs) as sess:
coord = tf.train.Coordinator()
if load_model:
print('Loading Model...')
ckpt = tf.train.get_checkpoint_state(cfg.model_path)
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.global_variables_initializer())
# This is where the asynchronous magic happens.
# Start the "work" process for each worker in a separate threat.
worker_threads = []
for ag in agents:
agent_train = lambda: ag.train_a3c(max_episode_length, gamma, sess, coord, saver)
t = threading.Thread(target=(agent_train))
t.start()
time.sleep(0.5)
worker_threads.append(t)
coord.join(worker_threads)
print("training ends, costs{}".format(time.time() - s_t))
def main_play(tf_configs=None):
tf.reset_default_graph()
with tf.Session(config=tf_configs) as sess:
ag = agent.Agent(DoomGame(), 0, optimizer=None, play=True)
print('Loading Model...')
saver = tf.train.Saver()
ckpt = tf.train.get_checkpoint_state(cfg.model_path)
saver.restore(sess, os.path.join(cfg.model_path, cfg.model_file))
print('Successfully loaded the model, now time to play the game...')
time.sleep(3)
ag.play_game(sess, 10)
def __init__(self):
""" Constructor """
self.cmds = self.create_payload_cmd_handler()
self.agent = Agent()
self.agent.service_handler["Payload Command"] = self.cmds.dispatch
self.capture_proc = None
def __init__(self, consul_endpoint, vcore_endpoint, controller_endpoints,
enable_tls=False, key_file=None, cert_file=None,
vcore_retry_interval=0.5, devices_refresh_interval=5,
subscription_refresh_interval=5):
log.info('init-connection-manager')
log.info('list-of-controllers', controller_endpoints=controller_endpoints)
self.controller_endpoints = controller_endpoints
self.consul_endpoint = consul_endpoint
self.vcore_endpoint = vcore_endpoint
self.enable_tls = enable_tls
self.key_file = key_file
self.cert_file = cert_file
self.channel = None
self.grpc_client = None # single, shared gRPC client to vcore
self.agent_map = {} # (datapath_id, controller_endpoint) -> Agent()
self.device_id_to_datapath_id_map = {}
self.vcore_retry_interval = vcore_retry_interval
self.devices_refresh_interval = devices_refresh_interval
self.subscription_refresh_interval = subscription_refresh_interval
self.subscription = None
self.running = False
def create_agent(self, device):
datapath_id = device.datapath_id
device_id = device.id
for controller_endpoint in self.controller_endpoints:
agent = Agent(controller_endpoint, datapath_id,
device_id, self.grpc_client, self.enable_tls,
self.key_file, self.cert_file)
agent.start()
self.agent_map[(datapath_id,controller_endpoint)] = agent
self.device_id_to_datapath_id_map[device_id] = datapath_id
def run(settings):
recreate_subdirectory_structure(settings)
tf.reset_default_graph()
with tf.device("/cpu:0"):
global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
global_network = ACNetwork('global', None)
num_agents = 1
agents = []
envs = []
for i in range(num_agents):
if settings["game"] == '11arms':
this_env = ElevenArms()
else:
this_env = TwoArms(settings["game"])
envs.append(this_env)
for i in range(num_agents):
agents.append(Agent(envs[i], i, optimizer, global_step, settings))
saver = tf.train.Saver(max_to_keep=5)
with tf.Session() as sess:
coord = tf.train.Coordinator()
ckpt = tf.train.get_checkpoint_state(settings["load_from"])
print("Loading Model from {}".format(ckpt.model_checkpoint_path))
saver.restore(sess, ckpt.model_checkpoint_path)
agent_threads = []
for agent in agents:
agent_play = lambda: agent.play(sess, coord, saver)
thread = threading.Thread(target=agent_play)
thread.start()
agent_threads.append(thread)
coord.join(agent_threads)
def run(settings):
recreate_subdirectory_structure(settings)
tf.reset_default_graph()
with tf.device("/cpu:0"):
global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
global_network = ACNetwork('global', None)
num_agents = 1
agents = []
envs = []
for i in range(num_agents):
if settings["game"] == '11arms':
this_env = ElevenArms()
else:
this_env = TwoArms(settings["game"])
envs.append(this_env)
for i in range(num_agents):
agents.append(Agent(envs[i], i, optimizer, global_step, settings))
saver = tf.train.Saver(max_to_keep=5)
with tf.Session() as sess:
coord = tf.train.Coordinator()
ckpt = tf.train.get_checkpoint_state(settings["load_from"])
print("Loading Model from {}".format(ckpt.model_checkpoint_path))
saver.restore(sess, ckpt.model_checkpoint_path)
agent_threads = []
for agent in agents:
agent_play = lambda: agent.play(sess, coord, saver)
thread = threading.Thread(target=agent_play)
thread.start()
agent_threads.append(thread)
coord.join(agent_threads)
def run(settings):
tf.reset_default_graph()
with tf.device("/cpu:0"):
global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
global_network = ACNetwork('global', None)
num_agents = 1
agents = []
envs = []
for i in range(num_agents):
if settings["game"] == '11arms':
this_env = ElevenArms()
else:
this_env = TwoArms(settings["game"])
envs.append(this_env)
for i in range(num_agents):
agents.append(Agent(envs[i], i, optimizer, global_step, settings))
saver = tf.train.Saver(max_to_keep=5)
with tf.Session() as sess:
coord = tf.train.Coordinator()
if FLAGS.resume:
ckpt = tf.train.get_checkpoint_state(settings["load_from"])
print("Loading Model from {}".format(ckpt.model_checkpoint_path))
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.global_variables_initializer())
agent_threads = []
for agent in agents:
agent_play = lambda: agent.play(sess, coord, saver)
thread = threading.Thread(target=agent_play)
thread.start()
agent_threads.append(thread)
coord.join(agent_threads)
def run(settings):
tf.reset_default_graph()
with tf.device("/cpu:0"):
global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
global_network = ACNetwork('global', None)
num_agents = 1
agents = []
envs = []
for i in range(num_agents):
if settings["game"] == '11arms':
this_env = ElevenArms()
else:
this_env = TwoArms(settings["game"])
envs.append(this_env)
for i in range(num_agents):
agents.append(Agent(envs[i], i, optimizer, global_step, settings))
saver = tf.train.Saver(max_to_keep=5)
with tf.Session() as sess:
coord = tf.train.Coordinator()
if FLAGS.resume:
ckpt = tf.train.get_checkpoint_state(settings["load_from"])
print("Loading Model from {}".format(ckpt.model_checkpoint_path))
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.global_variables_initializer())
agent_threads = []
for agent in agents:
agent_play = lambda: agent.play(sess, coord, saver)
thread = threading.Thread(target=agent_play)
thread.start()
agent_threads.append(thread)
coord.join(agent_threads)
def run(settings):
recreate_subdirectory_structure(settings)
tf.reset_default_graph()
with tf.device("/cpu:0"):
global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
global_network = ACNetwork('global', None)
num_agents = 1
agents = []
envs = []
for i in range(num_agents):
if settings["game"] == '11arms':
this_env = ElevenArms()
else:
this_env = TwoArms(settings["game"])
envs.append(this_env)
for i in range(num_agents):
agents.append(Agent(envs[i], i, optimizer, global_step, settings))
saver = tf.train.Saver(max_to_keep=5)
with tf.Session() as sess:
coord = tf.train.Coordinator()
if FLAGS.resume:
ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
print("Loading Model from {}".format(ckpt.model_checkpoint_path))
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.global_variables_initializer())
agent_threads = []
for agent in agents:
agent_play = lambda: agent.play(sess, coord, saver)
thread = threading.Thread(target=agent_play)
thread.start()
agent_threads.append(thread)
coord.join(agent_threads)
def run(settings):
recreate_subdirectory_structure(settings)
tf.reset_default_graph()
with tf.device("/cpu:0"):
global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
global_network = ACNetwork('global', None)
num_agents = 1
agents = []
envs = []
for i in range(num_agents):
if settings["game"] == '11arms':
this_env = ElevenArms()
else:
this_env = TwoArms(settings["game"])
envs.append(this_env)
for i in range(num_agents):
agents.append(Agent(envs[i], i, optimizer, global_step, settings))
saver = tf.train.Saver(max_to_keep=5)
with tf.Session() as sess:
coord = tf.train.Coordinator()
ckpt = tf.train.get_checkpoint_state(settings["load_from"])
print("Loading Model from {}".format(ckpt.model_checkpoint_path))
saver.restore(sess, ckpt.model_checkpoint_path)
agent_threads = []
for agent in agents:
agent_play = lambda: agent.play(sess, coord, saver)
thread = threading.Thread(target=agent_play)
thread.start()
agent_threads.append(thread)
coord.join(agent_threads)
def run(settings):
tf.reset_default_graph()
with tf.device("/cpu:0"):
global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
global_network = ACNetwork('global', None)
num_agents = 1
agents = []
envs = []
for i in range(num_agents):
if settings["game"] == '11arms':
this_env = ElevenArms()
else:
this_env = TwoArms(settings["game"])
envs.append(this_env)
for i in range(num_agents):
agents.append(Agent(envs[i], i, optimizer, global_step, settings))
saver = tf.train.Saver(max_to_keep=5)
with tf.Session() as sess:
coord = tf.train.Coordinator()
if FLAGS.resume:
ckpt = tf.train.get_checkpoint_state(settings["load_from"])
print("Loading Model from {}".format(ckpt.model_checkpoint_path))
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.global_variables_initializer())
agent_threads = []
for agent in agents:
agent_play = lambda: agent.play(sess, coord, saver)
thread = threading.Thread(target=agent_play)
thread.start()
agent_threads.append(thread)
coord.join(agent_threads)
def run(settings):
recreate_subdirectory_structure(settings)
tf.reset_default_graph()
with tf.device("/cpu:0"):
global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
global_network = ACNetwork('global', None)
num_agents = 1
agents = []
envs = []
for i in range(num_agents):
if settings["game"] == '11arms':
this_env = ElevenArms()
else:
this_env = TwoArms(settings["game"])
envs.append(this_env)
for i in range(num_agents):
agents.append(Agent(envs[i], i, optimizer, global_step, settings))
saver = tf.train.Saver(max_to_keep=5)
with tf.Session() as sess:
coord = tf.train.Coordinator()
if FLAGS.resume:
ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
print("Loading Model from {}".format(ckpt.model_checkpoint_path))
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.global_variables_initializer())
agent_threads = []
for agent in agents:
agent_play = lambda: agent.play(sess, coord, saver)
thread = threading.Thread(target=agent_play)
thread.start()
agent_threads.append(thread)
coord.join(agent_threads)