cartpole.py 文件源码-python代码片段

def feature_extractor_2(state, action, normalize=True):
    # features are
    # - bias
    # - the elements of the state vector
    # - all second order terms (mixed and pure)
    # - sines of every first and second order term
    # - cosines of every first and second order term
    # all of that times two (i.e. for each of the actions)
    s_prime = np.r_[1, state] # dim(state) + 1
    # dim(state) (dim(state) + 1) / 2
    quadratic = np.outer(s_prime, s_prime)[np.tril_indices(s_prime.shape[0])].reshape(-1)
    # (dim(state) (dim(state) + 1) / 2) - 1
    sines = np.sin(quadratic[1:])
    cosines = np.cos(quadratic[1:])
    # dim(state)(dim(state) + 1) - 2 + dim(state)(dim(state) + 1) / 2
    state_feats = np.r_[quadratic, sines, cosines]
    # dim(state_feats) * 2
    features = np.outer(state_feats, np.array([0, 1]) == action).T.reshape(-1)
    if normalize:
        # normalize everything but the bias.
        norm = features / (np.linalg.norm(features))
        norm[0] = int(action == 0) * 1.0
        norm[state_feats.shape[0]] = int(action == 1) * 1.0
        return norm
    else:
        return features