def reward(sample_solution, USE_CUDA=False):
"""
The reward for the sorting task is defined as the
length of the longest sorted consecutive subsequence.
Input sequences must all be the same length.
Example:
input | output
====================
[1 4 3 5 2] | [5 1 2 3 4]
The output gets a reward of 4/5, or 0.8
The range is [1/sourceL, 1]
Args:
sample_solution: list of len sourceL of [batch_size]
Tensors
Returns:
[batch_size] containing trajectory rewards
"""
batch_size = sample_solution[0].size(0)
sourceL = len(sample_solution)
longest = Variable(torch.ones(batch_size), requires_grad=False)
current = Variable(torch.ones(batch_size), requires_grad=False)
if USE_CUDA:
longest = longest.cuda()
current = current.cuda()
for i in range(1, sourceL):
# compare solution[i-1] < solution[i]
res = torch.lt(sample_solution[i-1], sample_solution[i])
# if res[i,j] == 1, increment length of current sorted subsequence
current += res.float()
# else, reset current to 1
current[torch.eq(res, 0)] = 1
#current[torch.eq(res, 0)] -= 1
# if, for any, current > longest, update longest
mask = torch.gt(current, longest)
longest[mask] = current[mask]
return -torch.div(longest, sourceL)
sorting_task.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录