seqdata.py 文件源码

python
阅读 19 收藏 0 点赞 0 评论 0

项目:sk-torch 作者: mattHawthorn 项目源码 文件源码
def from_vocab(cls, sequences: Map[int, Seq[H]], vocab: Vocabulary, max_len: int, pack_sequences: bool=False,
                   append_eos: bool=True, eos_token: Opt[H]=DEFAULT_EOS, null_token: H=DEFAULT_NULL,
                   int_id_type: str='long', shuffle: bool=True):
        """
        :param vocab: instance of Vocabulary to use for encoding/decoding tokens
        :param max_len: maximum length of sequences to sample
        :param pack_sequences: bool indicating whether to return regular Tensors or PackedSequence instances.
        :param int_id_type: string indicating the type of int ids to use. Must be a key of data.str_to_int_tensor_type.
        :param eos_token: string or hashable to append to mark end-of-sequence in encoding
        :param null_token: Optional hashable to use for padding sequences. Added to the vocab, unless none is passed
            and none is built, in which case this is considered to be an int id.
            Numpy aliases for integer types are valid, as well as 'long', 'short', 'byte', 'char'.
            The default 'long' is recommended, as only LongTensors can be used to index Embeddings in pytorch.
        """
        encoder = SequenceTensorEncoder(vocab, append_eos=append_eos, eos_token=eos_token, null_token=null_token,
                                        int_id_type=int_id_type)
        return cls(sequences=sequences, encoder=encoder, max_len=max_len, pack_sequences=pack_sequences,
                   null_token=null_token, shuffle=shuffle)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号