def well_formed(self):
return (
isinstance(self.H, Agent) and
isinstance(self.child_base, BudgetedHCH) and
areinstances(self.args, Referent)
)
python类Agent()的实例源码
def well_formed(self):
return (
isinstance(self.transcript, tuple) and
all(hashable(x) for x in self.transcript) and
isinstance(self.transcript_hash, six.string_types) and
isinstance(self.agent, Agent) and
self.agent.state_free
)
def run_agent(save_path, T, game_name):
with tf.Session() as sess:
agent = Agent(session=sess, observation_shape=(210,160,3),
action_size=3,
optimizer=tf.train.AdamOptimizer(1e-4))
# Create a saver, and only keep 2 checkpoints.
saver = tf.train.Saver()
saver.restore(sess, save_path + '-' + str(T))
play(agent, game_name)
return sess, agent
def main():
"""Main function
Test the checkpoint
"""
simulator_config = 'config/simulator.json'
print 'Starting simulator...'
simulator = DoomSimulator(simulator_config)
simulator.add_bots(10)
print 'Simulator started!'
agent_config = 'config/agent.json'
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
print 'Creating agent...'
ag = Agent(sess, agent_config, simulator)
print 'load model...'
loadstatus = ag.load('./checkpoints')
if not loadstatus:
raise IOError
img_buffer = np.zeros((ag.history_length, simulator.num_channels, simulator.resolution[1], simulator.resolution[0]))
measure_buffer = np.zeros((ag.history_length, simulator.num_measure))
curr_step = 0
term = False
acts_to_replace = [a + b + c + d for a in [[0, 0], [1, 1]]
for b in [[0, 0], [1, 1]] for c in [[0]] for d in [[0], [1]]]
replacement_act = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
# Action0-5: MOVE_FORWARD MOVE_BACKWARD TURN_LEFT TURN_RIGHT ATTACK SPPED
# Action6-11: SELECT_WEAPON2 ~ SELECT_WEAPON7
while not term:
if curr_step < ag.history_length:
img, meas, reward, term = simulator.step(np.squeeze(ag.random_actions(1)).tolist())
else:
state_imgs = img_buffer[np.arange(curr_step - ag.history_length, curr_step) % ag.history_length]
state_imgs = np.reshape(state_imgs, (1,) + ag.get_img_shape())
state_imgs = np.transpose(state_imgs, [0, 2, 3, 1])
state_meas = measure_buffer[np.arange(curr_step - ag.history_length, curr_step) % ag.history_length]
state_meas = np.reshape(state_meas, (1, ag.history_length * simulator.num_measure))
curr_act = np.squeeze(ag.act(state_imgs, state_meas, ag.test_objective_params)[0]).tolist()
if curr_act[:6] in acts_to_replace:
curr_act = replacement_act
img, meas, reward, term = simulator.step(curr_act)
if (not (meas is None)) and meas[0] > 30.:
meas[0] = 30.
simulator.show_info()
sleep(0.02)
if not term:
img_buffer[curr_step % ag.history_length] = img
measure_buffer[curr_step % ag.history_length] = meas
curr_step += 1
simulator.close_game()
def validate(self, agent_instance):
""" Validate an agent instance and update its code and error_message
if the agent instance is not valid
Args:
agent_instance (:obj:`apiai_assistant.agent.Agent`): agent instance
Returns:
bool: True if valid, False otherwise.
"""
if not agent_instance.parser:
agent_instance.error('Could not instantiate parser',
code=agent.Status.InvalidData)
return False
if not agent_instance.parser.is_valid:
agent_instance.error('Could not validate data',
code=agent.Status.InvalidData)
return False
logging.debug("""
- Actions: {actions}
- Action: {action}""".format(
actions=self.action_map.keys(),
action=agent_instance.parser.action))
if (not agent_instance.parser.action
or agent_instance.parser.action not in self.action_map):
agent_instance.error('Could not understand action',
code=agent.Status.InvalidData)
return False
logging.debug("""
- HTTP Request: {data}
- API.AI Request: {request}
- Agent: {code} {message}
- Valid: {valid}""".format(
data=agent_instance.parser.data,
request=agent_instance.parser.request,
code=agent_instance.code,
message=agent_instance.error_message,
valid=agent_instance.code == agent.Status.OK))
return True
def run(sc):
ts = datetime.datetime.now().replace(microsecond=0).isoformat('_')
sc.sim_time_start = ts
logger.setup(sc)
store_scenario(sc)
INFO('Init (%s)' % ts)
INFO('Fitness (minimal): %.6f' % sc.sol_fitness_min)
INFO('Fitness (maximal): %.6f' % sc.sol_fitness_max)
INFO('Fitness (average): %.6f' % sc.sol_fitness_avg)
INFO('Creating %d agents' % sc.opt_m)
agents = dict()
for aid, search_space, initial_value in zip(
sc.agent_ids, sc.agent_search_spaces, sc.agent_initial_values):
agents[aid] = Agent(aid, search_space, initial_value)
INFO('Connecting agents')
for a, neighbors in sc.network.items():
for n in neighbors:
# Consistency check
assert a != n, 'cannot add myself as neighbor!'
# Add neighbor
DEBUG('', 'Connecting', a, '->', n)
if n not in agents[a].neighbors:
agents[a].neighbors[n] = agents[n]
else:
WARNING(n, 'is already neighbor of', a)
INFO('Starting simulation')
mas = Mas(sc, agents)
logger.set_mas(mas)
stats = Stats(sc, agents)
stats.eval(mas.current_time)
AGENT(mas.aid, 'Notifying initial agent (%s)' % sc.sim_initial_agent)
kappa = Working_Memory(sc.objective, dict(),
Solution_Candidate(None, dict(), float('-inf')))
msg = Message(mas.aid, sc.sim_initial_agent, kappa)
mas.msg(msg)
while mas.is_active():
mas.step()
stats.eval(mas.current_time)
if not stats.is_converged():
ERROR('convergence not reached!')
ts = datetime.datetime.now().replace(microsecond=0).isoformat('_')
INFO('End (%s)' % ts)
# Store scenario again, this time with simulation result
store_scenario(sc, overwrite=True)
return stats
def run(settings):
recreate_subdirectory_structure(settings)
tf.reset_default_graph()
with tf.device("/cpu:0"):
global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
global_network = ACNetwork('global', None)
num_agents = 1
agents = []
envs = []
for i in range(num_agents):
if settings["game"] == '11arms':
this_env = ElevenArms()
else:
this_env = TwoArms(settings["game"])
envs.append(this_env)
for i in range(num_agents):
agents.append(Agent(envs[i], i, optimizer, global_step, settings))
saver = tf.train.Saver(max_to_keep=5)
with tf.Session() as sess:
coord = tf.train.Coordinator()
if FLAGS.resume:
if FLAGS.hypertune:
ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
else:
ckpt = tf.train.get_checkpoint_state(settings["load_from"])
print("Loading Model from {}".format(ckpt.model_checkpoint_path))
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.global_variables_initializer())
agent_threads = []
for agent in agents:
agent_play = lambda: agent.play(sess, coord, saver)
thread = threading.Thread(target=agent_play)
thread.start()
agent_threads.append(thread)
coord.join(agent_threads)
def run():
recreate_directory_structure()
tf.reset_default_graph()
sess = tf.Session()
# sess = tf_debug.LocalCLIDebugWrapperSession(sess)
# sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
with sess:
with tf.device("/cpu:0"):
global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr)
# num_agents = multiprocessing.cpu_count()
num_agents = FLAGS.nb_concurrent
agents = []
envs = []
for i in range(num_agents):
gym_env = gym.make(FLAGS.game)
# if FLAGS.monitor:
# gym_env = gym.wrappers.Monitor(gym_env, FLAGS.experiments_dir + '/worker_{}'.format(i), force=True)
if FLAGS.game not in flags.SUPPORTED_ENVS:
gym_env = atari_environment.AtariEnvironment(gym_env=gym_env, resized_width=FLAGS.resized_width,
resized_height=FLAGS.resized_height,
agent_history_length=FLAGS.agent_history_length)
FLAGS.nb_actions = len(gym_env.gym_actions)
envs.append(gym_env)
global_network = FUNNetwork('global', None)
for i in range(num_agents):
agents.append(Agent(envs[i], i, optimizer, global_step))
saver = tf.train.Saver(max_to_keep=5)
coord = tf.train.Coordinator()
if FLAGS.resume:
ckpt = tf.train.get_checkpoint_state(os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name))
print("Loading Model from {}".format(ckpt.model_checkpoint_path))
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.global_variables_initializer())
agent_threads = []
for agent in agents:
thread = threading.Thread(target=(lambda: agent.play(sess, coord, saver)))
thread.start()
agent_threads.append(thread)
while True:
if FLAGS.show_training:
for env in envs:
# time.sleep(1)
# with main_lock:
env.render()
coord.join(agent_threads)
def run():
recreate_directory_structure()
tf.reset_default_graph()
sess = tf.Session()
# sess = tf_debug.LocalCLIDebugWrapperSession(sess)
# sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
with sess:
with tf.device("/cpu:0"):
global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr)
if FLAGS.use_conv:
global_network = ConvNetwork('global', None)
else:
global_network = ACNetwork('global', None)
# num_agents = multiprocessing.cpu_count()
num_agents = FLAGS.nb_concurrent
agents = []
envs = []
for i in range(num_agents):
gym_env = gym.make(FLAGS.game)
# if FLAGS.monitor:
# gym_env = gym.wrappers.Monitor(gym_env, FLAGS.experiments_dir + '/worker_{}'.format(i), force=True)
envs.append(gym_env)
for i in range(num_agents):
agents.append(Agent(envs[i], i, optimizer, global_step))
saver = tf.train.Saver(max_to_keep=5)
coord = tf.train.Coordinator()
if FLAGS.resume:
ckpt = tf.train.get_checkpoint_state(os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name))
print("Loading Model from {}".format(ckpt.model_checkpoint_path))
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.global_variables_initializer())
agent_threads = []
for agent in agents:
thread = threading.Thread(target=(lambda: agent.play(sess, coord, saver)))
thread.start()
agent_threads.append(thread)
while True:
if FLAGS.show_training:
for env in envs:
# time.sleep(1)
# with main_lock:
env.render()
coord.join(agent_threads)
def run(settings):
recreate_subdirectory_structure(settings)
tf.reset_default_graph()
with tf.device("/cpu:0"):
global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
global_network = ACNetwork('global', None)
num_agents = 1
agents = []
envs = []
for i in range(num_agents):
if settings["game"] == '11arms':
this_env = ElevenArms()
else:
this_env = TwoArms(settings["game"])
envs.append(this_env)
for i in range(num_agents):
agents.append(Agent(envs[i], i, optimizer, global_step, settings))
saver = tf.train.Saver(max_to_keep=5)
with tf.Session() as sess:
coord = tf.train.Coordinator()
if FLAGS.resume:
if FLAGS.hypertune:
ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
else:
ckpt = tf.train.get_checkpoint_state(settings["load_from"])
print("Loading Model from {}".format(ckpt.model_checkpoint_path))
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.global_variables_initializer())
agent_threads = []
for agent in agents:
agent_play = lambda: agent.play(sess, coord, saver)
thread = threading.Thread(target=agent_play)
thread.start()
agent_threads.append(thread)
coord.join(agent_threads)
def run(settings):
recreate_subdirectory_structure(settings)
tf.reset_default_graph()
with tf.device("/cpu:0"):
global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
global_network = ACNetwork('global', None)
num_agents = 1
agents = []
envs = []
for i in range(num_agents):
if settings["game"] == '11arms':
this_env = ElevenArms()
else:
this_env = TwoArms(settings["game"])
envs.append(this_env)
for i in range(num_agents):
agents.append(Agent(envs[i], i, optimizer, global_step, settings))
saver = tf.train.Saver(max_to_keep=5)
with tf.Session() as sess:
coord = tf.train.Coordinator()
if FLAGS.resume:
ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
# print("Loading Model from {}".format(ckpt.model_checkpoint_path))
try:
saver.restore(sess, ckpt.model_checkpoint_path)
except Exception as e:
print(sys.exc_info()[0])
print(e)
else:
sess.run(tf.global_variables_initializer())
agent_threads = []
for agent in agents:
agent_play = lambda: agent.play(sess, coord, saver)
thread = threading.Thread(target=agent_play)
thread.start()
agent_threads.append(thread)
coord.join(agent_threads)
def a3c(game_name, num_threads=8, restore=None, save_path='model'):
processes = []
envs = []
for _ in range(num_threads+1):
gym_env = gym.make(game_name)
if game_name == 'CartPole-v0':
env = CustomGymClassicControl(game_name)
else:
print "Assuming ATARI game and playing with pixels"
env = CustomGym(game_name)
envs.append(env)
# Separate out the evaluation environment
evaluation_env = envs[0]
envs = envs[1:]
with tf.Session() as sess:
agent = Agent(session=sess,
action_size=envs[0].action_size, model='mnih',
optimizer=tf.train.AdamOptimizer(INITIAL_LEARNING_RATE))
# Create a saver, and only keep 2 checkpoints.
saver = tf.train.Saver(max_to_keep=2)
T_queue = Queue.Queue()
# Either restore the parameters or don't.
if restore is not None:
saver.restore(sess, save_path + '-' + str(restore))
last_T = restore
print "T was:", last_T
T_queue.put(last_T)
else:
sess.run(tf.global_variables_initializer())
T_queue.put(0)
summary = Summary(save_path, agent)
# Create a process for each worker
for i in range(num_threads):
processes.append(threading.Thread(target=async_trainer, args=(agent,
envs[i], sess, i, T_queue, summary, saver, save_path,)))
# Create a process to evaluate the agent
processes.append(threading.Thread(target=evaluator, args=(agent,
evaluation_env, sess, T_queue, summary, saver, save_path,)))
# Start all the processes
for p in processes:
p.daemon = True
p.start()
# Until training is finished
while not training_finished:
sleep(0.01)
# Join the processes, so we get this thread back.
for p in processes:
p.join()
# Returns sum(rewards[i] * gamma**i)