python类Agent()的实例源码-第2页-面圈网

hch.py 文件源码项目：alba 作者: paulfchristiano 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def well_formed(self):
        return (
            isinstance(self.H, Agent) and
            isinstance(self.child_base, BudgetedHCH) and
            areinstances(self.args, Referent)
        )

memoizer.py 文件源码项目：alba 作者: paulfchristiano 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def well_formed(self):
        return (
            isinstance(self.transcript, tuple) and
            all(hashable(x) for x in self.transcript) and
            isinstance(self.transcript_hash, six.string_types) and
            isinstance(self.agent, Agent) and
            self.agent.state_free
        )

run_agent.py 文件源码项目：reinforcement-learning 作者: cgnicholls 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def run_agent(save_path, T, game_name):
    with tf.Session() as sess:
        agent = Agent(session=sess, observation_shape=(210,160,3),
        action_size=3,
        optimizer=tf.train.AdamOptimizer(1e-4))

        # Create a saver, and only keep 2 checkpoints.
        saver = tf.train.Saver()

        saver.restore(sess, save_path + '-' + str(T))

        play(agent, game_name)

        return sess, agent

run_agent.py 文件源码项目：IntelAct-Vizdoom 作者: chendagui16 项目源码文件源码阅读 16 收藏 0 点赞 0 评论 0

def main():
    """Main function
    Test the checkpoint
    """
    simulator_config = 'config/simulator.json'
    print 'Starting simulator...'
    simulator = DoomSimulator(simulator_config)
    simulator.add_bots(10)
    print 'Simulator started!'

    agent_config = 'config/agent.json'
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))

    print 'Creating agent...'
    ag = Agent(sess, agent_config, simulator)
    print 'load model...'
    loadstatus = ag.load('./checkpoints')
    if not loadstatus:
        raise IOError

    img_buffer = np.zeros((ag.history_length, simulator.num_channels, simulator.resolution[1], simulator.resolution[0]))
    measure_buffer = np.zeros((ag.history_length, simulator.num_measure))
    curr_step = 0
    term = False
    acts_to_replace = [a + b + c + d for a in [[0, 0], [1, 1]]
                       for b in [[0, 0], [1, 1]] for c in [[0]] for d in [[0], [1]]]
    replacement_act = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
    # Action0-5: MOVE_FORWARD MOVE_BACKWARD TURN_LEFT TURN_RIGHT ATTACK SPPED
    # Action6-11: SELECT_WEAPON2 ~ SELECT_WEAPON7

    while not term:
        if curr_step < ag.history_length:
            img, meas, reward, term = simulator.step(np.squeeze(ag.random_actions(1)).tolist())
        else:
            state_imgs = img_buffer[np.arange(curr_step - ag.history_length, curr_step) % ag.history_length]
            state_imgs = np.reshape(state_imgs, (1,) + ag.get_img_shape())
            state_imgs = np.transpose(state_imgs, [0, 2, 3, 1])
            state_meas = measure_buffer[np.arange(curr_step - ag.history_length, curr_step) % ag.history_length]
            state_meas = np.reshape(state_meas, (1, ag.history_length * simulator.num_measure))
            curr_act = np.squeeze(ag.act(state_imgs, state_meas, ag.test_objective_params)[0]).tolist()
            if curr_act[:6] in acts_to_replace:
                curr_act = replacement_act
            img, meas, reward, term = simulator.step(curr_act)
            if (not (meas is None)) and meas[0] > 30.:
                meas[0] = 30.
        simulator.show_info()
        sleep(0.02)
        if not term:
            img_buffer[curr_step % ag.history_length] = img
            measure_buffer[curr_step % ag.history_length] = meas
            curr_step += 1
    simulator.close_game()

assistant.py 文件源码项目：apiai_assistant 作者: toasterco 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def validate(self, agent_instance):
        """ Validate an agent instance and update its code and error_message
        if the agent instance is not valid

        Args:
            agent_instance (:obj:`apiai_assistant.agent.Agent`): agent instance

        Returns:
            bool: True if valid, False otherwise.

        """

        if not agent_instance.parser:
            agent_instance.error('Could not instantiate parser',
                                 code=agent.Status.InvalidData)
            return False

        if not agent_instance.parser.is_valid:
            agent_instance.error('Could not validate data',
                                 code=agent.Status.InvalidData)
            return False

        logging.debug("""
        - Actions: {actions}
        - Action: {action}""".format(
            actions=self.action_map.keys(),
            action=agent_instance.parser.action))

        if (not agent_instance.parser.action
           or agent_instance.parser.action not in self.action_map):
            agent_instance.error('Could not understand action',
                                 code=agent.Status.InvalidData)
            return False

        logging.debug("""
        - HTTP Request: {data}
        - API.AI Request: {request}
        - Agent: {code} {message}
        - Valid: {valid}""".format(
            data=agent_instance.parser.data,
            request=agent_instance.parser.request,
            code=agent_instance.code,
            message=agent_instance.error_message,
            valid=agent_instance.code == agent.Status.OK))

        return True

cli.py 文件源码项目：cohda 作者: ambimanus 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def run(sc):
    ts = datetime.datetime.now().replace(microsecond=0).isoformat('_')
    sc.sim_time_start = ts
    logger.setup(sc)
    store_scenario(sc)
    INFO('Init (%s)' % ts)

    INFO('Fitness (minimal): %.6f' % sc.sol_fitness_min)
    INFO('Fitness (maximal): %.6f' % sc.sol_fitness_max)
    INFO('Fitness (average): %.6f' % sc.sol_fitness_avg)

    INFO('Creating %d agents' % sc.opt_m)
    agents = dict()
    for aid, search_space, initial_value in zip(
            sc.agent_ids, sc.agent_search_spaces, sc.agent_initial_values):
        agents[aid] = Agent(aid, search_space, initial_value)

    INFO('Connecting agents')
    for a, neighbors in sc.network.items():
        for n in neighbors:
            # Consistency check
            assert a != n, 'cannot add myself as neighbor!'
            # Add neighbor
            DEBUG('', 'Connecting', a, '->', n)
            if n not in agents[a].neighbors:
                agents[a].neighbors[n] = agents[n]
            else:
                WARNING(n, 'is already neighbor of', a)

    INFO('Starting simulation')
    mas = Mas(sc, agents)
    logger.set_mas(mas)
    stats = Stats(sc, agents)
    stats.eval(mas.current_time)
    AGENT(mas.aid, 'Notifying initial agent (%s)' % sc.sim_initial_agent)
    kappa = Working_Memory(sc.objective, dict(),
                           Solution_Candidate(None, dict(), float('-inf')))
    msg = Message(mas.aid, sc.sim_initial_agent, kappa)
    mas.msg(msg)
    while mas.is_active():
        mas.step()
        stats.eval(mas.current_time)
    if not stats.is_converged():
        ERROR('convergence not reached!')

    ts = datetime.datetime.now().replace(microsecond=0).isoformat('_')
    INFO('End (%s)' % ts)

    # Store scenario again, this time with simulation result
    store_scenario(sc, overwrite=True)

    return stats

run_siloz.py 文件源码项目：meta-learning 作者: ioanachelu 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            if FLAGS.hypertune:
                ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
            else:
                ckpt = tf.train.get_checkpoint_state(settings["load_from"])
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)

run.py 文件源码项目：meta-learning 作者: ioanachelu 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def run():
    recreate_directory_structure()
    tf.reset_default_graph()

    sess = tf.Session()
    # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
    # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
    with sess:
        with tf.device("/cpu:0"):
            global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
            optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr)


            # num_agents = multiprocessing.cpu_count()
            num_agents = FLAGS.nb_concurrent
            agents = []
            envs = []

            for i in range(num_agents):
                gym_env = gym.make(FLAGS.game)
                # if FLAGS.monitor:
                #     gym_env = gym.wrappers.Monitor(gym_env, FLAGS.experiments_dir + '/worker_{}'.format(i), force=True)
                if FLAGS.game not in flags.SUPPORTED_ENVS:
                    gym_env = atari_environment.AtariEnvironment(gym_env=gym_env, resized_width=FLAGS.resized_width,
                                                                 resized_height=FLAGS.resized_height,
                                                                 agent_history_length=FLAGS.agent_history_length)
                    FLAGS.nb_actions = len(gym_env.gym_actions)

                envs.append(gym_env)

            global_network = FUNNetwork('global', None)

            for i in range(num_agents):
                agents.append(Agent(envs[i], i, optimizer, global_step))
            saver = tf.train.Saver(max_to_keep=5)

        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name))
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            thread = threading.Thread(target=(lambda: agent.play(sess, coord, saver)))
            thread.start()
            agent_threads.append(thread)

        while True:
            if FLAGS.show_training:
                for env in envs:
                    # time.sleep(1)
                    # with main_lock:
                    env.render()

        coord.join(agent_threads)

run.py 文件源码项目：meta-learning 作者: ioanachelu 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def run():
    recreate_directory_structure()
    tf.reset_default_graph()

    sess = tf.Session()
    # sess = tf_debug.LocalCLIDebugWrapperSession(sess)
    # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
    with sess:
        with tf.device("/cpu:0"):
            global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
            optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr)
            if FLAGS.use_conv:
                global_network = ConvNetwork('global', None)
            else:
                global_network = ACNetwork('global', None)

            # num_agents = multiprocessing.cpu_count()
            num_agents = FLAGS.nb_concurrent
            agents = []
            envs = []

            for i in range(num_agents):
                gym_env = gym.make(FLAGS.game)
                # if FLAGS.monitor:
                #     gym_env = gym.wrappers.Monitor(gym_env, FLAGS.experiments_dir + '/worker_{}'.format(i), force=True)
                envs.append(gym_env)

            for i in range(num_agents):
                agents.append(Agent(envs[i], i, optimizer, global_step))
            saver = tf.train.Saver(max_to_keep=5)

        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name))
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            thread = threading.Thread(target=(lambda: agent.play(sess, coord, saver)))
            thread.start()
            agent_threads.append(thread)

        while True:
            if FLAGS.show_training:
                for env in envs:
                    # time.sleep(1)
                    # with main_lock:
                    env.render()

        coord.join(agent_threads)

run_siloz.py 文件源码项目：meta-learning 作者: ioanachelu 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            if FLAGS.hypertune:
                ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
            else:
                ckpt = tf.train.get_checkpoint_state(settings["load_from"])
            print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)

validate_hypertune.py 文件源码项目：meta-learning 作者: ioanachelu 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def run(settings):
    recreate_subdirectory_structure(settings)
    tf.reset_default_graph()

    with tf.device("/cpu:0"):
        global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"])
        global_network = ACNetwork('global', None)

        num_agents = 1
        agents = []
        envs = []
        for i in range(num_agents):
            if settings["game"] == '11arms':
                this_env = ElevenArms()
            else:
                this_env = TwoArms(settings["game"])
            envs.append(this_env)

        for i in range(num_agents):
            agents.append(Agent(envs[i], i, optimizer, global_step, settings))
        saver = tf.train.Saver(max_to_keep=5)

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        if FLAGS.resume:
            ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"])
            # print("Loading Model from {}".format(ckpt.model_checkpoint_path))
            try:
                saver.restore(sess, ckpt.model_checkpoint_path)
            except Exception as e:
                print(sys.exc_info()[0])
                print(e)
        else:
            sess.run(tf.global_variables_initializer())

        agent_threads = []
        for agent in agents:
            agent_play = lambda: agent.play(sess, coord, saver)
            thread = threading.Thread(target=agent_play)
            thread.start()
            agent_threads.append(thread)
        coord.join(agent_threads)

a3c.py 文件源码项目：reinforcement-learning 作者: cgnicholls 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def a3c(game_name, num_threads=8, restore=None, save_path='model'):
    processes = []
    envs = []
    for _ in range(num_threads+1):
        gym_env = gym.make(game_name)
        if game_name == 'CartPole-v0':
            env = CustomGymClassicControl(game_name)
        else:
            print "Assuming ATARI game and playing with pixels"
            env = CustomGym(game_name)
        envs.append(env)

    # Separate out the evaluation environment
    evaluation_env = envs[0]
    envs = envs[1:]

    with tf.Session() as sess:
        agent = Agent(session=sess,
        action_size=envs[0].action_size, model='mnih',
        optimizer=tf.train.AdamOptimizer(INITIAL_LEARNING_RATE))

        # Create a saver, and only keep 2 checkpoints.
        saver = tf.train.Saver(max_to_keep=2)

        T_queue = Queue.Queue()

        # Either restore the parameters or don't.
        if restore is not None:
            saver.restore(sess, save_path + '-' + str(restore))
            last_T = restore
            print "T was:", last_T
            T_queue.put(last_T)
        else:
            sess.run(tf.global_variables_initializer())
            T_queue.put(0)

        summary = Summary(save_path, agent)

        # Create a process for each worker
        for i in range(num_threads):
            processes.append(threading.Thread(target=async_trainer, args=(agent,
            envs[i], sess, i, T_queue, summary, saver, save_path,)))

        # Create a process to evaluate the agent
        processes.append(threading.Thread(target=evaluator, args=(agent,
        evaluation_env, sess, T_queue, summary, saver, save_path,)))

        # Start all the processes
        for p in processes:
            p.daemon = True
            p.start()

        # Until training is finished
        while not training_finished:
            sleep(0.01)

        # Join the processes, so we get this thread back.
        for p in processes:
            p.join()

# Returns sum(rewards[i] * gamma**i)