def basic_sgd_demo(train_X, train_Y, val_X, val_Y, test_X, test_Y, config):
# This is to demonstrate the process of learning a simple one hidden-layer NN
# Input kernel: linear
# Num hidden layer: 1
# Learning method: SGD
# Parse param from config
lr = config['lr']
num_epoch = config['num_epoch']
num_train_per_class = config['num_train_per_class']
num_hidden_node = config['num_hidden_node']
display_rate = config['display_rate']
activation_function_type = config['activation_function']
num_train_sample = train_X.shape[0]
num_feature = train_X.shape[1]
num_class = train_Y.shape[1]
# Create a weight matrix of shape (2, num_hidden_node)
W1 = rng.randn(num_feature, num_hidden_node)
b1 = rng.randn(1, num_hidden_node)
# Create output weight
W2 = rng.randn(num_hidden_node, num_class)
b2 = rng.randn(1, num_class)
num_train_sample = 1
pylab.ion()
pylab.show()
all_cost = []
for i in range(0, num_epoch):
# Calculate the loss
a1 = np.dot(train_X, W1) + b1
z1 = activation_function(a1, activation_function_type)
a2 = np.dot(z1, W2) + b2
J = softmax_log_loss(a2, train_Y)
# Doing backprop
print('[Epoch %d] Train loss: %f' % (i, J))
dJ_dW1, dJ_db1, dJ_dW2, dJ_db2 = get_grad(train_X, train_Y, W1, b1, W2, b2, config)
# NumericalGradientCheck(train_X, train_Y, W1, b1, W2, b2, dJ_db1)
W1 = W1 - dJ_dW1 * lr
b1 = b1 - dJ_db1 * lr
W2 = W2 - dJ_dW2 * lr
b2 = b2 - dJ_db2 * lr
all_cost.append(J)
if (i % display_rate == 0):
config['train_method'] = 'sgd'
train_draw(train_X, train_Y, W1, b1, W2, b2, config, all_cost, i, J)
bp = 1
评论列表
文章目录