GoldMineSAAlgorithmQLearning.java 文件源码-java代码片段

/**
 * Returns the {@link QLearningStateNode} object stored for the given hashed state. If no {@link QLearningStateNode} object.
 * is stored, then it is created and has its Q-value initialize using this objects {@link burlap.behavior.valuefunction.ValueFunctionInitialization} data member.
 * @param s the hashed state for which to get the {@link QLearningStateNode} object
 * @return the {@link QLearningStateNode} object stored for the given hashed state. If no {@link QLearningStateNode} object.
 */
@Override
protected QLearningStateNode getStateNode(HashableState s){
    //System.out.println("getStateNode");
    QLearningStateNode node = qIndex.get(s);
    if(node == null){
        node = new QLearningStateNode(s);
        List<GroundedAction> gas = this.getAllGroundedActions(s.s);
        if(gas.size() == 0){
            gas = this.getAllGroundedActions(s.s);
            throw new RuntimeErrorException(new Error("No possible actions in this state, cannot continue Q-learning"));
        }
        for(GroundedAction ga : gas){
            if(ga.applicableInState(s.s)){
                node.addQValue(ga, qInitFunction.qValue(s.s, ga));
                this.qTableSize++;
            }
        }
        qIndex.put(s, node);
    }
    return node;
}