def reader(self):
with open(self.filepath, 'r') as f:
if self.has_header:
next(f)
for line in f:
w, *vec = line.split()
yield w, vec
python类split()的实例源码
def shards(data, size=25, test=False):
"""
Generator over variables that will be involved in a costly loss computation
such as the softmax. It yields dictionaries of the same form as the input,
where the variables have been splitted in smaller shards and detach from
the graph. It expects the consumer to back propagate through them in shards
of given a size. After all shards are consumed, the generator will take
care of backprop further from the input using the accumulated gradients.
"""
# Inspired by www.github.com/OpenNMT/OpenNMT-py/blob/master/onmt/Loss.py
if test:
yield data
return
detached = dict(detach_vars(data))
splits = ((key, torch.split(v, size)) for key, v in detached.items())
keys, splits = zip(*splits)
for split in zip(*splits):
yield dict(zip(keys, split)) # go and accumulate some loss
inputs, grads = [], []
for key, var in detached.items():
if var.grad is not None:
inputs.append(data[key]), grads.append(var.grad.data)
torch.autograd.backward(inputs, grads, retain_graph=True)
# Initializers
def split(self, split_size, dim=0):
r"""Splits this tensor into tensor chunks of :attr:`split_size` size.
See :func:`torch.split`.
"""
return torch.split(self, split_size, dim)
def forward(self, tensors: List[torch.Tensor], # pylint: disable=arguments-differ
mask: torch.Tensor = None) -> torch.Tensor:
"""
Compute a weighted average of the ``tensors``. The input tensors an be any shape
with at least two dimensions, but must all be the same shape.
When ``do_layer_norm=True``, the ``mask`` is required input. If the ``tensors`` are
dimensioned ``(dim_0, ..., dim_{n-1}, dim_n)``, then the ``mask`` is dimensioned
``(dim_0, ..., dim_{n-1})``, as in the typical case with ``tensors`` of shape
``(batch_size, timesteps, dim)`` and ``mask`` of shape ``(batch_size, timesteps)``.
When ``do_layer_norm=False`` the ``mask`` is ignored.
"""
if len(tensors) != self.mixture_size:
raise ConfigurationError("{} tensors were passed, but the module was initialized to "
"mix {} tensors.".format(len(tensors), self.mixture_size))
def _do_layer_norm(tensor, broadcast_mask, num_elements_not_masked):
tensor_masked = tensor * broadcast_mask
mean = torch.sum(tensor_masked) / num_elements_not_masked
variance = torch.sum(((tensor_masked - mean) * broadcast_mask)**2) / num_elements_not_masked
return (tensor - mean) / torch.sqrt(variance + 1E-12)
normed_weights = torch.nn.functional.softmax(torch.cat([parameter for parameter
in self.scalar_parameters]), dim=0)
normed_weights = torch.split(normed_weights, split_size=1)
if not self.do_layer_norm:
pieces = []
for weight, tensor in zip(normed_weights, tensors):
pieces.append(weight * tensor)
return self.gamma * sum(pieces)
else:
mask_float = mask.float()
broadcast_mask = mask_float.unsqueeze(-1)
input_dim = tensors[0].size(-1)
num_elements_not_masked = torch.sum(mask_float) * input_dim
pieces = []
for weight, tensor in zip(normed_weights, tensors):
pieces.append(weight * _do_layer_norm(tensor,
broadcast_mask, num_elements_not_masked))
return self.gamma * sum(pieces)
def forward(self, buffers, transitions):
buffers = [list(torch.split(b.squeeze(1), 1, 0))
for b in torch.split(buffers, 1, 1)]
stacks = [[buf[0], buf[0]] for buf in buffers]
if hasattr(self, 'tracker'):
self.tracker.reset_state()
else:
assert transitions is not None
if transitions is not None:
num_transitions = transitions.size(0)
# trans_loss, trans_acc = 0, 0
else:
num_transitions = len(buffers[0]) * 2 - 3
for i in range(num_transitions):
if transitions is not None:
trans = transitions[i]
if hasattr(self, 'tracker'):
tracker_states, trans_hyp = self.tracker(buffers, stacks)
if trans_hyp is not None:
trans = trans_hyp.max(1)[1]
# if transitions is not None:
# trans_loss += F.cross_entropy(trans_hyp, trans)
# trans_acc += (trans_preds.data == trans.data).mean()
# else:
# trans = trans_preds
else:
tracker_states = itertools.repeat(None)
lefts, rights, trackings = [], [], []
batch = zip(trans.data, buffers, stacks, tracker_states)
for transition, buf, stack, tracking in batch:
if transition == 3: # shift
stack.append(buf.pop())
elif transition == 2: # reduce
rights.append(stack.pop())
lefts.append(stack.pop())
trackings.append(tracking)
if rights:
reduced = iter(self.reduce(lefts, rights, trackings))
for transition, stack in zip(trans.data, stacks):
if transition == 2:
stack.append(next(reduced))
# if trans_loss is not 0:
return bundle([stack.pop() for stack in stacks])[0]
def alpha_loss(outputs, targets, generator, crit, max_generator_batches, rewards, proposed_weights, tau, alpha, eval=False):
"""Loss function of proposed method.
:param outputs: seq_len x batch_size x logits_size
:param targets: seq_len x batch_size
:param generator:
:param crit:
:param max_generator_batches:
:param eval:
:return:
"""
# compute generations one piece at a time
num_correct, loss = 0, 0
outputs = Variable(outputs.data, requires_grad=(not eval), volatile=eval) # seq_len x batch_size x logits_size
batch_size = outputs.size(1)
outputs_split = torch.split(outputs, max_generator_batches)
targets_split = torch.split(targets, max_generator_batches)
# TODO(sotetsuk): fix to calculate at once
importance_list = []
p_sample_efficiency_list = []
q_sample_efficiency_list = []
pq_sample_efficiency_list = []
for i, (out_t, targ_t) in enumerate(zip(outputs_split, targets_split)):
out_t = out_t.view(-1, out_t.size(2)) # seq_len * batch_size x logits_size
scores_t = generator(out_t) # seq_len * batch_size x voc_size
proposed_weights = torch.FloatTensor(proposed_weights)
log_q_weights = torch.FloatTensor(rewards) / tau
loss_t, importance_t, p_sample_efficiency_t, q_sample_efficiency_t, pq_sample_efficiency_t = crit(scores_t, targ_t.view(-1), proposed_weights, log_q_weights, alpha, rewards) # scholar (1-d)
pred_t = scores_t.max(1)[1] # seq_len * batch_size x 1
num_correct_t = pred_t.data.eq(targ_t.data).masked_select(targ_t.ne(Constants.PAD).data).sum()
num_correct += num_correct_t
loss += loss_t.data[0]
importance_list += importance_t
p_sample_efficiency_list += p_sample_efficiency_t
q_sample_efficiency_list += q_sample_efficiency_t
pq_sample_efficiency_list += pq_sample_efficiency_t
if not eval:
loss_t.div(batch_size).backward()
grad_output = None if outputs.grad is None else outputs.grad.data
return loss, grad_output, num_correct, importance_list, p_sample_efficiency_list, q_sample_efficiency_list, pq_sample_efficiency_list
def shards(state, shard_size, eval=False):
"""
Args:
state: A dictionary which corresponds to the output of
*LossCompute.make_shard_state(). The values for
those keys are Tensor-like or None.
shard_size: The maximum size of the shards yielded by the model.
eval: If True, only yield the state, nothing else.
Otherwise, yield shards.
Yields:
Each yielded shard is a dict.
Side effect:
After the last shard, this function does back-propagation.
"""
if eval:
yield state
else:
# non_none: the subdict of the state dictionary where the values
# are not None.
non_none = dict(filter_shard_state(state))
# Now, the iteration:
# state is a dictionary of sequences of tensor-like but we
# want a sequence of dictionaries of tensors.
# First, unzip the dictionary into a sequence of keys and a
# sequence of tensor-like sequences.
keys, values = zip(*((k, torch.split(v, shard_size))
for k, v in non_none.items()))
# Now, yield a dictionary for each shard. The keys are always
# the same. values is a sequence of length #keys where each
# element is a sequence of length #shards. We want to iterate
# over the shards, not over the keys: therefore, the values need
# to be re-zipped by shard and then each shard can be paired
# with the keys.
for shard_tensors in zip(*values):
yield dict(zip(keys, shard_tensors))
# Assumed backprop'd
variables = ((state[k], v.grad.data) for k, v in non_none.items()
if isinstance(v, Variable) and v.grad is not None)
inputs, grads = zip(*variables)
torch.autograd.backward(inputs, grads)