SGD.py 文件源码-python代码片段

def basic_sgd_demo(train_X, train_Y, val_X, val_Y, test_X, test_Y, config):
    # This is to demonstrate the process of learning a simple one hidden-layer NN
    # Input kernel: linear
    # Num hidden layer: 1
    # Learning method: SGD

    # Parse param from config
    lr = config['lr']
    num_epoch = config['num_epoch']
    num_train_per_class = config['num_train_per_class']
    num_hidden_node = config['num_hidden_node']
    display_rate = config['display_rate']
    activation_function_type = config['activation_function']

    num_train_sample = train_X.shape[0]
    num_feature = train_X.shape[1]
    num_class = train_Y.shape[1]

    # Create a weight matrix of shape (2, num_hidden_node)
    W1 = rng.randn(num_feature, num_hidden_node)
    b1 = rng.randn(1, num_hidden_node)

    # Create output weight
    W2 = rng.randn(num_hidden_node, num_class)
    b2 = rng.randn(1, num_class)

    num_train_sample = 1
    pylab.ion()
    pylab.show()
    all_cost = []
    for i in range(0, num_epoch):
        # Calculate the loss
        a1 = np.dot(train_X, W1) + b1
        z1 = activation_function(a1, activation_function_type)
        a2 = np.dot(z1, W2) + b2
        J = softmax_log_loss(a2, train_Y)

        # Doing backprop
        print('[Epoch %d] Train loss: %f' % (i, J))

        dJ_dW1, dJ_db1, dJ_dW2, dJ_db2 = get_grad(train_X, train_Y, W1, b1, W2, b2, config)
        # NumericalGradientCheck(train_X, train_Y, W1, b1, W2, b2, dJ_db1)
        W1 = W1 - dJ_dW1 * lr
        b1 = b1 - dJ_db1 * lr
        W2 = W2 - dJ_dW2 * lr
        b2 = b2 - dJ_db2 * lr

        all_cost.append(J)

        if (i % display_rate == 0):
            config['train_method'] = 'sgd'
            train_draw(train_X, train_Y, W1, b1, W2, b2, config, all_cost, i, J)

        bp = 1