python类AttrScope()的实例源码-面圈网

symbol.py 文件源码项目：ademxapp 作者: itijyou 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def bn(data, name, eps=1.001e-5, fix_gamma=False, use_global_stats=None):
    if use_global_stats is None:
        use_global_stats = cfg.get('bn_use_global_stats', False)

    if fix_gamma:
        with mx.AttrScope(lr_mult='0.', wd_mult='0.'):
            gamma = mx.sym.Variable('{}_gamma'.format(name))
            beta = mx.sym.Variable('{}_beta'.format(name))
        return mx.sym.BatchNorm(data=data, gamma=gamma, beta=beta, name=name,
                                eps=eps,
                                fix_gamma=True,
                                use_global_stats=use_global_stats)
    else:
        lr_type = cfg.get('lr_type', 'torch')
        with _attr_scope_lr(lr_type, 'weight'):
            gamma = mx.sym.Variable('{}_gamma'.format(name))
        with _attr_scope_lr(lr_type, 'bias'):
            beta = mx.sym.Variable('{}_beta'.format(name))
        return mx.sym.BatchNorm(data=data, gamma=gamma, beta=beta, name=name,
                                eps=eps,
                                fix_gamma=False,
                                use_global_stats=use_global_stats)

encoder.py 文件源码项目：sockeye 作者: awslabs 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def encode(self,
               data: mx.sym.Symbol,
               data_length: Optional[mx.sym.Symbol],
               seq_len: int) -> Tuple[mx.sym.Symbol, mx.sym.Symbol, int]:
        """
        Encodes data given sequence lengths of individual examples and maximum sequence length.

        :param data: Input data.
        :param data_length: Vector with sequence lengths.
        :param seq_len: Maximum sequence length.
        :return: Encoded versions of input data (data, data_length, seq_len).
        """
        with mx.AttrScope(__layout__=C.TIME_MAJOR):
            return mx.sym.swapaxes(data=data, dim1=0, dim2=1), data_length, seq_len

test_multi_device_exec.py 文件源码项目：mxnet_tk1 作者: starimpact 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def test_ctx_group():
    with mx.AttrScope(ctx_group='stage1'):
        data = mx.symbol.Variable('data')
        fc1  = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
        act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")

    set_stage1 = set(act1.list_arguments())
    with mx.AttrScope(ctx_group='stage2'):
        fc2  = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
        act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
        fc3  = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10)
        fc3 = mx.symbol.BatchNorm(fc3)
        mlp  = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax')

    set_stage2 = set(mlp.list_arguments()) - set_stage1

    group2ctx = {
        'stage1' : mx.cpu(1),
        'stage2' : mx.cpu(2)
    }
    texec = mlp.simple_bind(mx.cpu(0),
                            group2ctx=group2ctx,
                            data=(1,200))

    for arr, name in zip(texec.arg_arrays, mlp.list_arguments()):
        if name in set_stage1:
            assert arr.context == group2ctx['stage1']
        else:
            assert arr.context == group2ctx['stage2']

test_model_parallel.py 文件源码项目：mxnet_tk1 作者: starimpact 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def test_chain():
    n = 2
    data1 = mx.sym.Variable('data1')
    data2 = mx.sym.Variable('data2')
    with mx.AttrScope(ctx_group='dev1'):
        net = data1 + data2
        net = net * 3

    with mx.AttrScope(ctx_group='dev2'):
        net = net + data1

    with mx.Context(mx.cpu(0)):
        shape = (4, 5)
        arr = [mx.nd.empty(shape) for i in range(n)]
        arr_grad = [mx.nd.empty(shape) for i in range(n)]

    exec1 = net.bind(mx.cpu(),
                     args=arr,
                     args_grad=arr_grad,
                     group2ctx={'dev1': mx.cpu(0), 'dev2': mx.cpu(1)})
    arr[0][:] = 1.0
    arr[1][:] = 2.0
    arr2 = [a.copyto(mx.cpu()) for a in arr]
    arr_grad2 = [a.copyto(mx.cpu()) for a in arr_grad]
    exec2 = net.bind(mx.cpu(),
                     args=arr2,
                     args_grad=arr_grad2)

    # Show the execution plan that involves copynode
    print(exec1.debug_str())
    exec1.forward()
    exec2.forward()
    assert reldiff(exec1.outputs[0].asnumpy(), exec2.outputs[0].asnumpy()) < 1e-6
    out_grad = mx.nd.empty(shape, mx.cpu(1))
    out_grad[:] = 1.0
    exec1.backward([out_grad])
    exec2.backward([out_grad.copyto(mx.cpu())])
    for a, b in zip(arr_grad, arr_grad2):
        assert reldiff(a.asnumpy(), b.asnumpy()) < 1e-6

test_attr.py 文件源码项目：mxnet_tk1 作者: starimpact 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def test_attr_basic():
    with mx.AttrScope(group='4', data='great'):
        data = mx.symbol.Variable('data',
                                  attr={'dtype':'data',
                                        'group': '1'})
        gdata = mx.symbol.Variable('data2')
    assert gdata.attr('group') == '4'
    assert data.attr('group') == '1'
    data2 = pkl.loads(pkl.dumps(data))
    assert data.attr('dtype') == data2.attr('dtype')

test_attr.py 文件源码项目：mxnet_tk1 作者: starimpact 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def test_operator():
    data = mx.symbol.Variable('data')
    with mx.AttrScope(group='4', data='great'):
        fc1 = mx.symbol.Activation(data, act_type='relu')
        with mx.AttrScope(init_bias='0.0'):
            fc2 = mx.symbol.FullyConnected(fc1, num_hidden=10, name='fc2')
    assert fc1.attr('data') == 'great'
    assert fc2.attr('data') == 'great'
    assert fc2.attr('init_bias') == '0.0'
    fc2copy = pkl.loads(pkl.dumps(fc2))
    assert fc2copy.tojson() == fc2.tojson()
    fc2weight = fc2.get_internals()['fc2_weight']

symbol.py 文件源码项目：ademxapp 作者: itijyou 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def _attr_scope_lr(lr_type, lr_owner):
    assert lr_type in ('alex', 'alex10', 'torch')
    # weight (lr_mult, wd_mult); bias;
    # 1, 1; 2, 0;
    if lr_type == 'alex':
        if lr_owner == 'weight':
            return mx.AttrScope()
        elif lr_owner == 'bias':
            return mx.AttrScope(lr_mult='2.', wd_mult='0.')
        else:
            assert False
    # 10, 1; 20, 0;
    if lr_type == 'alex10':
        if lr_owner == 'weight':
            return mx.AttrScope(lr_mult='10.', wd_mult='1.')
        elif lr_owner == 'bias':
            return mx.AttrScope(lr_mult='20.', wd_mult='0.')
        else:
            assert False
    # 0, 0; 0, 0;
    # so apply this to both
    if lr_type == 'fixed':
        assert lr_owner in ('weight', 'bias')
        return mx.AttrScope(lr_mult='0.', wd_mult='0.')
    # 1, 1; 1, 1;
    # so do nothing
    return mx.AttrScope()

dssm.py 文件源码项目：DeepLearning-MXNet 作者: CNevd 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def get_dssm():
  doc_pos = mx.sym.Variable('doc_pos')
  doc_neg = mx.sym.Variable('doc_neg')
  data_usr = mx.sym.Variable("data_usr", stype='csr')

  #with mx.AttrScope(ctx_group="cpu"):
  w_usr = mx.sym.Variable('usr_weight', stype='row_sparse', shape=(USR_NUM, OUT_DIM))
  # shared weights
  w1 = mx.sym.Variable('fc1_doc_weight')
  w2 = mx.sym.Variable('fc2_doc_weight')
  w3 = mx.sym.Variable('fc3_doc_weight')
  b1 = mx.sym.Variable('fc1_doc_bias')
  b2 = mx.sym.Variable('fc2_doc_bias')
  b3 = mx.sym.Variable('fc3_doc_bias')

  def cosine(usr, doc):
    dot = usr * doc
    dot = mx.sym.sum_axis(dot, axis=1)
    return dot

  def doc_mlp(data):
    fc1 = mx.sym.FullyConnected(data=data, num_hidden=num_hidden, name='fc1', weight=w1, bias=b1)
    fc1 = mx.sym.Activation(data=fc1, act_type='relu')
    fc2 = mx.sym.FullyConnected(data=fc1, num_hidden=num_hidden, name='fc2', weight=w2, bias=b2)
    fc2 = mx.sym.Activation(data=fc2, act_type='relu')
    fc3 = mx.sym.FullyConnected(data=fc2, num_hidden=OUT_DIM, name='fc3', weight=w3, bias=b3)
    fc3 = mx.sym.Activation(data=fc3, act_type='relu')
    fc3 = mx.sym.L2Normalization(data=fc3)
    return fc3

  # usr net
  #with mx.AttrScope(ctx_group="cpu"):
  usr1 = mx.sym.dot(data_usr, w_usr)
  usr = mx.sym.L2Normalization(data=usr1)
  # doc net
  mlp_pos = doc_mlp(doc_pos)
  mlp_neg = doc_mlp(doc_neg)

  cosine_pos = cosine(usr, mlp_pos)
  cosine_neg = cosine(usr, mlp_neg)
  exp = mx.sym.exp(data=(cosine_neg - cosine_pos))
  pred = mx.sym.log1p(data=exp)
  out = mx.sym.MAERegressionOutput(data=pred, name='mae')
  return out

run.py 文件源码项目：returnn-benchmarks 作者: rwth-i6 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def lstm_unroll(num_lstm_layer, 
                num_hidden, dropout=0.,
                concat_decode=True, use_loss=False):
    """unrolled lstm network"""
    with mx.AttrScope(ctx_group='decode'):
        cls_weight = mx.sym.Variable("cls_weight")
        cls_bias = mx.sym.Variable("cls_bias")

    param_cells = []
    last_states = []
    for i in range(num_lstm_layer):
        with mx.AttrScope(ctx_group='layer%d' % i):
            param_cells.append(LSTMParam(i2h_weight = mx.sym.Variable("l%d_i2h_weight" % i),
                                         i2h_bias = mx.sym.Variable("l%d_i2h_bias" % i),
                                         h2h_weight = mx.sym.Variable("l%d_h2h_weight" % i),
                                         h2h_bias = mx.sym.Variable("l%d_h2h_bias" % i)))
            state = LSTMState(c=mx.sym.Variable("l%d_init_c" % i),
                              h=mx.sym.Variable("l%d_init_h" % i))
        last_states.append(state)

    # stack LSTM
    hidden = mx.sym.SliceChannel(data=mx.sym.Variable("data"), num_outputs=MAX_LEN, squeeze_axis=0)
    for i in range(num_lstm_layer):
       next_hidden = []
       for t in range(MAX_LEN):
         with mx.AttrScope(ctx_group='layer%d' % i):
           next_state = lstm(n_hidden, indata=hidden[t],
                              prev_state=last_states[i],
                              param=param_cells[i],
                              layeridx=i, dropout=0.)
           next_hidden.append(next_state.h)
           last_states[i] = next_state
       hidden = next_hidden[:]

    sm = []
    labels = mx.sym.SliceChannel(data=mx.sym.Variable("labels"), num_outputs=MAX_LEN, squeeze_axis=0)
    for t in range(MAX_LEN):
      fc = mx.sym.FullyConnected(data=hidden[t],
                               weight=cls_weight,
                               bias=cls_bias,
                               num_hidden=n_classes)
      sm.append(mx.sym.softmax_cross_entropy(fc, labels[t], name="sm"))

    for i in range(num_lstm_layer):
        state = last_states[i]
        state = LSTMState(c=mx.sym.BlockGrad(state.c, name="l%d_last_c" % i),
                          h=mx.sym.BlockGrad(state.h, name="l%d_last_h" % i))
        last_states[i] = state

    unpack_c = [state.c for state in last_states]
    unpack_h = [state.h for state in last_states]
    list_all = sm + unpack_c + unpack_h
    return mx.sym.Group(list_all)