sorting_task.py 文件源码-python代码片段

def reward(sample_solution, USE_CUDA=False):
    """
    The reward for the sorting task is defined as the
    length of the longest sorted consecutive subsequence.

    Input sequences must all be the same length.

    Example: 

    input       | output
    ====================
    [1 4 3 5 2] | [5 1 2 3 4]

    The output gets a reward of 4/5, or 0.8

    The range is [1/sourceL, 1]

    Args:
        sample_solution: list of len sourceL of [batch_size]
        Tensors
    Returns:
        [batch_size] containing trajectory rewards
    """
    batch_size = sample_solution[0].size(0)
    sourceL = len(sample_solution)

    longest = Variable(torch.ones(batch_size), requires_grad=False)
    current = Variable(torch.ones(batch_size), requires_grad=False)

    if USE_CUDA:
        longest = longest.cuda()
        current = current.cuda()

    for i in range(1, sourceL):
        # compare solution[i-1] < solution[i] 
        res = torch.lt(sample_solution[i-1], sample_solution[i]) 
        # if res[i,j] == 1, increment length of current sorted subsequence
        current += res.float()  
        # else, reset current to 1
        current[torch.eq(res, 0)] = 1
        #current[torch.eq(res, 0)] -= 1
        # if, for any, current > longest, update longest
        mask = torch.gt(current, longest)
        longest[mask] = current[mask]
    return -torch.div(longest, sourceL)