def sample_discrete_from_extero(self, i):
self.mm.fit(self.S_ext, self.M_prop_pred)
ext_err = np.sum(np.abs(self.goal_ext - self.S_ext))
if i % self.goal_sample_interval == 0 or \
((i - self.goal_sample_time) > 5 and ext_err > 0.1):
# update e2p
EP = np.hstack((np.asarray(self.e2p.X_), np.asarray(self.e2p.y_)))
# print "EP[%d] = %s" % (i, EP)
EP = EP[10:] # knn bootstrapping creates additional datapoints
# if i % 100 == 0:
# re-fit gmm e2p
# self.mm.fit(np.asarray(self.e2p.X_)[10:], np.asarray(self.e2p.y_)[10:])
# self.mm.fit(np.asarray(self.e2p.X_)[10:], np.asarray(self.e2p.y_)[10:])
# print "EP, cen_lst, cov_lst, p_k, logL", EP, self.cen_lst, self.cov_lst, self.p_k, self.logL
ref_interval = 1
self.cond = EP[(i+ref_interval) % EP.shape[0]] # X_[i,:3]
self.cond[2:] = np.nan
self.cond_ = np.random.uniform(-1, 1, (5, ))
# randomly fetch an exteroceptive state that we have seen already (= reachable)
self.goal_ext = EP[np.random.choice(range(self.numsteps/2)),:2].reshape((1, self.dim_ext))
# self.cond_[:2] = self.goal_ext
# self.cond_[2:] = np.nan
# print "self.cond", self.cond
# print "self.cond_", self.cond_
# predict proprioceptive goal from exteroceptive one
# if hasattr(self.mm, "cen_lst"):
# self.goal_prop = self.mm.sample(self.cond_)
# else:
# self.goal_prop = self.mm.sample(self.goal_ext)
self.goal_prop = self.mm.sample(self.goal_ext)
self.goal_sample_time = i
# (cen_con, cov_con, new_p_k) = gmm.cond_dist(self.cond_, self.cen_lst, self.cov_lst, self.p_k)
# self.goal_prop = gmm.sample_gaussian_mixture(cen_con, cov_con, new_p_k, samples = 1)
# # discrete goal
# self.goal_prop = np.random.uniform(self.environment.conf.m_mins, self.environment.conf.m_maxs, (1, self.odim))
print "new goal_prop[%d] = %s" % (i, self.goal_prop)
print " goal_ext[%d] = %s" % (i, self.goal_ext)
print "e_pred = %f" % (np.linalg.norm(self.E_prop_pred, 2))
print "ext_er = %f" % (ext_err)
# def lh_sample_error_gradient(self, i):
# """sample the local error gradient"""
# # hook: goal sampling
# if i % self.goal_sample_interval == 0:
# self.goal_prop = np.random.uniform(self.environment.conf.m_mins * 0.95, self.environment.conf.m_maxs * 0.95, (1, self.odim))
# print "new goal[%d] = %s" % (i, self.goal_prop)
# print "e_pred = %f" % (np.linalg.norm(self.E_prop_pred, 2))
评论列表
文章目录