def __init__(self, thread_id):
print "create worker %d"%(thread_id)
self.thread_id = thread_id
self.env = env = Environment(gym.make(pms.environment_name))
# print("Observation Space", env.observation_space)
# print("Action Space", env.action_space)
# print("Action area, high:%f, low%f" % (env.action_space.high, env.action_space.low))
self.end_count = 0
self.paths = []
self.train = True
self.baseline = Baseline()
self.storage = Storage(self, self.env, self.baseline)
self.distribution = DiagonalGaussian(pms.action_shape)
self.session = self.master.session
self.init_network()
python类Environment()的实例源码
def reset(self):
"""
Resets the Environment to the initial state
"""
## Initialize the state to be the middle
## value for each parameter e.g. if there are 13 and 19
## buckets for the arm and hand parameters, then the intial
## state should be (6,9)
##
## Also call self.crawlingRobot.setAngles()
## to the initial arm and hand angle
armState = self.nArmStates/2
handState = self.nHandStates/2
self.state = armState,handState
self.crawlingRobot.setAngles(self.armBuckets[armState],self.handBuckets[handState])
self.crawlingRobot.positions = [20,self.crawlingRobot.getRobotPosition()[0]]
def reset(self):
"""
Resets the Environment to the initial state
"""
## Initialize the state to be the middle
## value for each parameter e.g. if there are 13 and 19
## buckets for the arm and hand parameters, then the intial
## state should be (6,9)
##
## Also call self.crawlingRobot.setAngles()
## to the initial arm and hand angle
armState = self.nArmStates/2
handState = self.nHandStates/2
self.state = armState,handState
self.crawlingRobot.setAngles(self.armBuckets[armState],self.handBuckets[handState])
self.crawlingRobot.positions = [20,self.crawlingRobot.getRobotPosition()[0]]
def reset(self):
"""
Resets the Environment to the initial state
"""
## Initialize the state to be the middle
## value for each parameter e.g. if there are 13 and 19
## buckets for the arm and hand parameters, then the intial
## state should be (6,9)
##
## Also call self.crawlingRobot.setAngles()
## to the initial arm and hand angle
armState = self.nArmStates/2
handState = self.nHandStates/2
self.state = armState,handState
self.crawlingRobot.setAngles(self.armBuckets[armState],self.handBuckets[handState])
self.crawlingRobot.positions = [20,self.crawlingRobot.getRobotPosition()[0]]
def __init__(self,location,shape,plot):
self.location = location
self.shape = shape
self.plot = plot
self.env = Environment(self.shape)
self.agent = Agent()
self.child = Child(self.env.state)
def get_bash_script(script_dir, is_simulation = True, is_automated=False, is_testing=False):
"""
Reads a README.md file in the indicated directoy and builds an
executable bash script from the commands contained within.
"""
if not script_dir.endswith('/'):
script_dir = script_dir + "/"
script = ""
env = Environment(script_dir, False).get()
for key, value in env.items():
script += key + "='" + value + "'\n"
filename = env.get_script_file_name(script_dir)
in_code_block = False
in_results_section = False
lines = list(open(script_dir + filename))
for line in lines:
if line.startswith("Results:"):
# Entering results section
in_results_section = True
elif line.startswith("```") and not in_code_block:
# Entering a code block, if in_results_section = True then it's a results block
in_code_block = True
elif line.startswith("```") and in_code_block:
# Finishing code block
in_results_section = False
in_code_block = False
elif in_code_block and not in_results_section:
# Executable line
script += line
elif line.startswith("#") and not in_code_block and not in_results_section and not is_automated:
# Heading in descriptive text
script += "\n"
return script
def __init__(self, is_running_in_docker, script_dir="demo_scripts", filename="README.md", is_simulation=True, is_automated=False, is_testing=False, is_fast_fail=True,is_learning = False, parent_script_dir = None, is_prep_only = False, is_prerequisite = False, output_format="log"):
"""
is_running_in_docker should be set to true is we are running inside a Docker container
script_dir is the location to look for scripts
filename is the filename of the script this demo represents
is_simulation should be set to true if we want to simulate a human running the commands
is_automated should be set to true if we don't want to wait for an operator to indicate it's time to execute the next command
is_testing is set to true if we want to compare actual results with expected results, by default execution will stop if any test fails (see is_fast_fail)
is_fast_fail should be set to true if we want to contnue running tests even after a failure
is_learning should be set to true if we want a human to type in the commands
parent_script_dir should be the directory of the script that calls this one, or None if this is the root script
is_prep_only should be set to true if we want to stop execution after all prerequisites are satsified
is_prerequisite indicates whether this is a prerequisite or not. It is used to decide behaviour with respect to simulation etc.
"""
self.mode = None
self.is_docker = is_running_in_docker
self.filename = filename
self.script_dir = ""
self.set_script_dir(script_dir)
self.is_simulation = is_simulation
self.is_automated = is_automated
self.is_testing = is_testing
self.is_fast_fail = is_fast_fail
self.is_learning = is_learning
self.current_command = ""
self.current_description = ""
self.last_command = ""
self.is_prep_only = is_prep_only
self.parent_script_dir = parent_script_dir
if self.parent_script_dir:
self.env = Environment(self.parent_script_dir, is_test = self.is_testing)
else:
self.env = Environment(self.script_dir, is_test = self.is_testing)
self.is_prerequisite = is_prerequisite
self.output_format = output_format
self.all_results = []
self.completed_validation_steps = []
def make_environment(name, **args):
"""Make an environment of a given name, possibly using extra arguments."""
env = environment_registry.get(name, Environment)
if name in environment_registry:
env = env(**args)
else:
env = env(name, **args)
if "atari.atari_env" in env.unwrapped.__module__:
to_dict = env.to_dict
env = Vectorize(env)
env = AtariRescale42x42(env)
env = Unvectorize(env)
env.to_dict = to_dict
return env
def run():
"""Run the agent for a finite number of trials."""
# Set up environment and agent
e = Environment() # create environment (also adds some dummy traffic)
a = e.create_agent(LearningAgent) # create agent
#e.set_primary_agent(a, enforce_deadline=False)
e.set_primary_agent(a, enforce_deadline=True) # set agent to track
# Now simulate it
sim = Simulator(e, update_delay=1.0) # reduce update_delay to speed up simulation
sim.run(n_trials=10) # press Esc or close pygame window to quit
def __init__(self, thread_id, master):
print "create thread %d"%(thread_id)
self.thread_id = thread_id
threading.Thread.__init__(self, name="thread_%d" % thread_id)
self.master = master
self.env = env = Environment(gym.make(pms.environment_name))
TRPOAgentBase.__init__(self, env)
self.session = self.master.session
self.init_network()
self.saver = tf.train.Saver(max_to_keep=10)
def run():
"""Run the agent for a finite number of trials."""
# Set up environment and agent
e = Environment() # create environment (also adds some dummy traffic)
a = e.create_agent(LearningAgent) # create agent
e.set_primary_agent(a, enforce_deadline=False) # specify agent to track
# NOTE: You can set enforce_deadline=False while debugging to allow longer trials
# Now simulate it
sim = Simulator(e, update_delay=0.001, display=True) # create simulator (uses pygame when display=True, if available)
# NOTE: To speed up simulation, reduce update_delay and/or set display=False
sim.run(n_trials=1) # run for a specified number of trials
# NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
def main():
env = None
try:
ssid = sys.argv[1]
device = sys.argv[2]
env = Environment(device, ssid, v=True)
except IndexError:
print 'Proper usage: python ddrone.py {ssid} {wireless device}'
if env:
env.crackdrone(0)
def test_network(self, reward_type):
"""
Network Testing is done on the validation data, and the testing data is to see if the representation
is learnt efficiently. Testing data is not meant to check RL accuracy. The validation data is meant for that.
Testing data is meant to check if the algorithm learnt feature representations of objects.
:return: None
"""
imagenet = self.__get_image_loader(NUM_VALIDATION_IMAGES)
imagenet.load_next_image()
state = self.__get_initial_state()
# initialize the environment
env = Environment(imagenet.get_image(), state, self.grid_dim, imagenet.get_puzzle_pieces(),
IMAGE_HEIGHT, WINDOW_SIZE, SLIDING_STRIDE,
self.input_channels, self.state_type)
reward_list = []
image_diff_list = []
episode_reward = 0.0
while True:
state_new, a_t, reward, terminal = self.__play_one_move(state, env, reward_type, epsilon=0.0)
episode_reward = reward + GAMMA * episode_reward
# if terminal state has reached, then move to the next image
if terminal:
image_diff_list.append(env.get_normalized_image_diff())
reward_list.append(episode_reward)
image_present = imagenet.load_next_image()
if image_present:
env.update_puzzle_pieces(imagenet.get_puzzle_pieces())
env.update_original_image(imagenet.get_image())
episode_reward = 0.0
else:
break
# update the old values
state = env.get_state()
# display the reward and image matching performance statistics
performance_statistics(image_diff_list, reward_list)
def run():
""" Driving function for running the simulation.
Press ESC to close the simulation, or [SPACE] to pause the simulation. """
##############
# Create the environment
# Flags:
# verbose - set to True to display additional output from the simulation
# num_dummies - discrete number of dummy agents in the environment, default is 100
# grid_size - discrete number of intersections (columns, rows), default is (8, 6)
env = Environment()
##############
# Create the driving agent
# Flags:
# learning - set to True to force the driving agent to use Q-learning
# * epsilon - continuous value for the exploration factor, default is 1
# * alpha - continuous value for the learning rate, default is 0.5
agent = env.create_agent(LearningAgent)
##############
# Follow the driving agent
# Flags:
# enforce_deadline - set to True to enforce a deadline metric
env.set_primary_agent(agent)
##############
# Create the simulation
# Flags:
# update_delay - continuous time (in seconds) between actions, default is 2.0 seconds
# display - set to False to disable the GUI if PyGame is enabled
# log_metrics - set to True to log trial and simulation results to /logs
# optimized - set to True to change the default log file name
sim = Simulator(env)
##############
# Run the simulator
# Flags:
# tolerance - epsilon tolerance before beginning testing, default is 0.05
# n_test - discrete number of testing trials to perform, default is 0
sim.run()
def doAction(self, action):
"""
Perform the action and update
the current state of the Environment
and return the reward for the
current state, the next state
and the taken action.
Returns:
nextState, reward
"""
nextState, reward = None, None
oldX,oldY = self.crawlingRobot.getRobotPosition()
armBucket,handBucket = self.state
armAngle,handAngle = self.crawlingRobot.getAngles()
if action == 'arm-up':
newArmAngle = self.armBuckets[armBucket+1]
self.crawlingRobot.moveArm(newArmAngle)
nextState = (armBucket+1,handBucket)
if action == 'arm-down':
newArmAngle = self.armBuckets[armBucket-1]
self.crawlingRobot.moveArm(newArmAngle)
nextState = (armBucket-1,handBucket)
if action == 'hand-up':
newHandAngle = self.handBuckets[handBucket+1]
self.crawlingRobot.moveHand(newHandAngle)
nextState = (armBucket,handBucket+1)
if action == 'hand-down':
newHandAngle = self.handBuckets[handBucket-1]
self.crawlingRobot.moveHand(newHandAngle)
nextState = (armBucket,handBucket-1)
newX,newY = self.crawlingRobot.getRobotPosition()
# a simple reward function
reward = newX - oldX
self.state = nextState
return nextState, reward
def run():
"""Run the agent for a finite number of trials."""
# Set up environment and agent
e = Environment() # create environment (also adds some dummy traffic)
a = e.create_agent(LearningAgent) # create agent
e.set_primary_agent(a, enforce_deadline=True) # specify agent to track
# NOTE: You can set enforce_deadline=False while debugging to allow longer trials
# Now simulate it
sim = Simulator(e, update_delay=0.005, display=False) # create simulator (uses pygame when display=True, if available)
# NOTE: To speed up simulation, reduce update_delay and/or set display=False
sim.run(n_trials=100) # run for a specified number of trials
# NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
def doAction(self, action):
"""
Perform the action and update
the current state of the Environment
and return the reward for the
current state, the next state
and the taken action.
Returns:
nextState, reward
"""
nextState, reward = None, None
oldX,oldY = self.crawlingRobot.getRobotPosition()
armBucket,handBucket = self.state
armAngle,handAngle = self.crawlingRobot.getAngles()
if action == 'arm-up':
newArmAngle = self.armBuckets[armBucket+1]
self.crawlingRobot.moveArm(newArmAngle)
nextState = (armBucket+1,handBucket)
if action == 'arm-down':
newArmAngle = self.armBuckets[armBucket-1]
self.crawlingRobot.moveArm(newArmAngle)
nextState = (armBucket-1,handBucket)
if action == 'hand-up':
newHandAngle = self.handBuckets[handBucket+1]
self.crawlingRobot.moveHand(newHandAngle)
nextState = (armBucket,handBucket+1)
if action == 'hand-down':
newHandAngle = self.handBuckets[handBucket-1]
self.crawlingRobot.moveHand(newHandAngle)
nextState = (armBucket,handBucket-1)
newX,newY = self.crawlingRobot.getRobotPosition()
# a simple reward function
reward = newX - oldX
self.state = nextState
return nextState, reward
def doAction(self, action):
"""
Perform the action and update
the current state of the Environment
and return the reward for the
current state, the next state
and the taken action.
Returns:
nextState, reward
"""
nextState, reward = None, None
oldX,oldY = self.crawlingRobot.getRobotPosition()
armBucket,handBucket = self.state
armAngle,handAngle = self.crawlingRobot.getAngles()
if action == 'arm-up':
newArmAngle = self.armBuckets[armBucket+1]
self.crawlingRobot.moveArm(newArmAngle)
nextState = (armBucket+1,handBucket)
if action == 'arm-down':
newArmAngle = self.armBuckets[armBucket-1]
self.crawlingRobot.moveArm(newArmAngle)
nextState = (armBucket-1,handBucket)
if action == 'hand-up':
newHandAngle = self.handBuckets[handBucket+1]
self.crawlingRobot.moveHand(newHandAngle)
nextState = (armBucket,handBucket+1)
if action == 'hand-down':
newHandAngle = self.handBuckets[handBucket-1]
self.crawlingRobot.moveHand(newHandAngle)
nextState = (armBucket,handBucket-1)
newX,newY = self.crawlingRobot.getRobotPosition()
# a simple reward function
reward = newX - oldX
self.state = nextState
return nextState, reward
def apply_function(self, function, args, env):
if datatypes.isLambda(function):
local_env = environment.Environment(
{},
outer=function.env,
bindings=function.args,
args=args)
return self.eval(function.body, local_env)
return function(*args)
def main(argv):
# Pretrained network to use
inputfile = None
# Wether to train or to test
train = False
# Trained network
outputfile = None
try:
opts, args = getopt.getopt(argv,"hrl:s:",["loadckpt=","saveckpt="])
except getopt.GetoptError:
print 'Incorrect usage. For more information: test.py -h'
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print 'python test.py -r -l <ckptfile> -s <ckptfile>'
print '-r for enabling training'
print '-l for loading pre-existing model'
print '-s for saving model to file'
sys.exit()
elif opt == '-r':
train = True
elif opt in ("-l", "--loadckpt"):
inputfile = arg
elif opt in ("-s", "--saveckpt"):
outputfile = arg
with tf.Session() as sess:
env = Environment()
agent = DQNAgent(env, sess, inputfile)
if train:
agent.train(6000000, outputfile)
else:
agent.test(2000)
def run():
"""Run the agent for a finite number of trials."""
# Set up environment and agent
e = Environment() # create environment (also adds some dummy traffic)
a = e.create_agent(LearningAgent) # create agent
e.set_primary_agent(a, enforce_deadline=True) # set agent to track
# Now simulate it
sim = Simulator(e, update_delay=1.0) # reduce update_delay to speed up simulation
sim.run(n_trials=200) # press Esc or close pygame window to quit
def generate_Environment():
env = environment.Environment(260, 260)
env.genTerrain(1)
# env.saveElevationData()
# env.loadElevationData()
env.plotElevation()
env.buildVisibilityMaps()
# env.saveVisibilityMaps()
# env.loadVisibilityMaps()
return env
def plot_Visible_Area(force):
plt.figure()
c = ['r', 'b']
a = 0
for F in force:
plt.plot(F.visible_area, c=c[a])
a += 1
plt.xlabel('Timestep')
plt.ylabel('Visible Area')
plt.title('Fraction of Environment Visible')
def main(_):
ps_hosts = FLAGS.ps_hosts.split(',')
worker_hosts = FLAGS.worker_hosts.split(',')
# Create a cluster from the parameter server and worker hosts.
cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
# Create and start a server for the local task.
# ???????
# ??????task_index ???????
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1 / 3.0)
server = tf.train.Server(cluster,
job_name=FLAGS.job_name,
task_index=FLAGS.task_index,
config=tf.ConfigProto(gpu_options=gpu_options))
if FLAGS.job_name == "ps":
server.join()
elif FLAGS.job_name == "worker":
# ?op ????????worker?
env = Environment(gym.make(pms.environment_name))
with tf.device(tf.train.replica_device_setter(
worker_device="/job:worker/task:%d" % (FLAGS.task_index),
cluster=cluster)):
agent = TRPOAgentParallel(env)
saver = tf.train.Saver(max_to_keep=10)
init_op = tf.initialize_all_variables()
summary_op = tf.merge_all_summaries()
# Create a "supervisor", which oversees the training process.
sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),
logdir="./checkpoint_parallel",
init_op=init_op,
global_step=agent.global_step,
saver=saver,
summary_op=None,
save_model_secs=60)
# The supervisor takes care of session initialization, restoring from
# a checkpoint, and closing when done or an error occurs.
with sv.managed_session(server.target) as sess:
agent.session = sess
agent.gf.session = sess
agent.sff.session =sess
agent.supervisor = sv
if pms.train_flag:
agent.learn()
elif FLAGS.task_index == 0:
agent.test(pms.checkpoint_file)
# Ask for all the services to stop.
sv.stop()
def run():
"""Run the agent for a finite number of trials."""
for simulation in range(0, N_SIMULATIONS):
# Set up environment and agent
e = Environment() # create environment (also adds some dummy traffic)
a = e.create_agent(LearningAgent) # create agent
e.set_primary_agent(a, enforce_deadline=True) # specify agent to track
# NOTE: You can set enforce_deadline=False while debugging to allow longer trials
# TODO: Change later enforce_deadline=True
# Now simulate it
sim = Simulator(e, update_delay=0.001, display=False) # create simulator (uses pygame when display=True, if available)
# NOTE: To speed up simulation, reduce update_delay and/or set display=False
sim.run(n_trials=N_TRIALS) # run for a specified number of trials
# NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
if simulation == N_SIMULATIONS - 1:
with open('results.csv', 'a') as csvfile:
fieldnames = ['alpha', 'gamma', 'epsilon', 'success_rate', 'last_failure']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
for index in range(0,len(simulation_rates)):
writer.writerow({
'alpha': get_simulation_params(0)[0],
'gamma': get_simulation_params(0)[1],
'epsilon': get_simulation_params(0)[2],
'success_rate': simulation_rates[index],
'last_failure': last_errors[index]})
if N_SIMULATIONS > 1: #multiple simulation AND last simulation
plt.figure(1)
plt.subplot(211)
plt.plot(simulation_rates)
plt.title('Success Rate/Simulation')
plt.xlabel('# Simulation')
plt.ylabel('Success Rate')
plt.subplot(212)
plt.plot(last_errors)
plt.title('Last failed trial per simulation')
plt.xlabel('# Simulation')
plt.ylabel('Last failed trial')
plt.show()