|
15 | 15 | X_train = mnist.train.images
|
16 | 16 | Y_train = mnist.train.labels
|
17 | 17 |
|
18 |
| -# X_train = X_train[:100, :] |
19 |
| -# Y_train = Y_train[:100, :] |
20 |
| - |
21 | 18 | # (5000, 784)
|
22 | 19 | X_validate = mnist.validation.images
|
23 | 20 | Y_validate = mnist.validation.labels
|
|
28 | 25 |
|
29 | 26 | # details about the network
|
30 | 27 | input_layer = 784 # 28 * 28 images flattened
|
31 |
| -hidden_layer = 256 |
| 28 | +hidden_layer_1 = 256 |
| 29 | +hidden_layer_2 = 256 |
32 | 30 | output_layer = 10
|
| 31 | + |
33 | 32 | print '\nNetwork details...'
|
34 | 33 | print 'Input size: ', input_layer
|
35 |
| -print 'Hidden layer units: ', hidden_layer |
| 34 | +print 'Hidden layer 1 units: ', hidden_layer_1 |
| 35 | +print 'Hidden layer 2 units: ', hidden_layer_2 |
36 | 36 | print 'Output layer units: ', output_layer
|
37 | 37 |
|
38 | 38 | # graph input
|
39 | 39 | x = tf.placeholder(tf.float32, [None, input_layer])
|
40 | 40 | y = tf.placeholder(tf.float32, [None, output_layer])
|
41 | 41 |
|
42 | 42 | # model weights
|
43 |
| -print '\nInitialising random weights and biases' |
44 |
| -w_hidden_vals = tf.random_normal([input_layer, hidden_layer]) |
45 |
| -b_hidden_vals = tf.random_normal([hidden_layer]) |
46 |
| -w_hidden = tf.Variable(w_hidden_vals, name='hidden_weights') |
47 |
| -b_hidden = tf.Variable(b_hidden_vals, name='hidden_bias') |
| 43 | +print '\nInitialising random weights and biases...' |
| 44 | +w_hidden1_vals = tf.random_normal([input_layer, hidden_layer_1]) |
| 45 | +b_hidden1_vals = tf.random_normal([hidden_layer_1]) |
| 46 | +w_hidden1 = tf.Variable(w_hidden1_vals, name='hidden1_weights') |
| 47 | +b_hidden1 = tf.Variable(b_hidden1_vals, name='hidden1_bias') |
| 48 | + |
| 49 | +w_hidden2_vals = tf.random_normal([hidden_layer_1, hidden_layer_2]) |
| 50 | +b_hidden2_vals = tf.random_normal([hidden_layer_2]) |
| 51 | +w_hidden2 = tf.Variable(w_hidden2_vals, name='hidden2_weights') |
| 52 | +b_hidden2 = tf.Variable(b_hidden2_vals, name='hidden2_bias') |
48 | 53 |
|
49 |
| -w_output_vals = tf.random_normal([hidden_layer, output_layer]) |
| 54 | + |
| 55 | +w_output_vals = tf.random_normal([hidden_layer_2, output_layer]) |
50 | 56 | b_output_vals = tf.random_normal([output_layer])
|
51 | 57 | w_output = tf.Variable(w_output_vals, name='output_weights')
|
52 | 58 | b_output = tf.Variable(b_output_vals, name='output_bias')
|
53 | 59 |
|
54 |
| -# model for a multi-layer-perceptron with single hidden layer |
55 |
| -# ReLU activation for the hidden layer |
56 |
| -hidden_activations = tf.nn.relu(tf.add(tf.matmul(x, w_hidden), b_hidden)) |
57 |
| -# softmax activation for the output layer |
58 |
| -output_activations = tf.add(tf.matmul(hidden_activations, w_output), b_output) |
| 60 | +# model for a multi-layer-perceptron with two hidden layers |
| 61 | +# ReLU activations for the first hidden layer |
| 62 | +hidden1_activations = tf.nn.relu(tf.add(tf.matmul(x, w_hidden1), b_hidden1)) |
| 63 | + |
| 64 | +#ReLU activations for the second hidden layer |
| 65 | +hidden2_activations = tf.nn.relu(tf.add(tf.matmul(hidden1_activations, w_hidden2), b_hidden2)) |
| 66 | + |
| 67 | +# linear activations for the output layer |
| 68 | +output_activations = tf.add(tf.matmul(hidden2_activations, w_output), b_output) |
59 | 69 |
|
60 | 70 | # using negative log-likelihood as the cost function
|
61 | 71 | # cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(output_activations), reduction_indices=1))
|
|
71 | 81 | # other variables
|
72 | 82 | batch_size = 100
|
73 | 83 | cost_vec = []
|
74 |
| -training_epochs = 25 |
| 84 | +training_epochs = 15 |
75 | 85 |
|
76 | 86 | # launch the graph
|
77 | 87 | print '\nLaunching the graph...'
|
78 | 88 | with tf.Session() as sess:
|
79 | 89 | sess.run(init_op)
|
80 | 90 |
|
81 | 91 | total_batches = int(mnist.train.num_examples / batch_size)
|
| 92 | + print 'Implementing batchwise stochastic gradient descent...' |
| 93 | + print 'batch size: ', batch_size |
| 94 | + print 'Total number of batches: ', total_batches |
82 | 95 |
|
83 | 96 | for epoch in xrange(training_epochs):
|
84 | 97 | avg_cost = 0
|
|
121 | 134 | print 'Test accuracy: ', sess.run(accuracy, feed_dict={x: X_test, y: Y_test}) * 100
|
122 | 135 |
|
123 | 136 | print '\nSaving the parameters...'
|
124 |
| - np.savez('./w_hidden', w_hidden=sess.run(w_hidden)) |
125 |
| - np.savez('./b_hidden', b_hidden=sess.run(b_hidden)) |
126 |
| - np.savez('./w_output', w_output=sess.run(w_output)) |
127 |
| - np.savez('./b_output', b_output=sess.run(b_output)) |
| 137 | + np.savez('./Params/w_hidden1', w_hidden1=sess.run(w_hidden1)) |
| 138 | + np.savez('./Params/b_hidden1', b_hidden1=sess.run(b_hidden1)) |
| 139 | + np.savez('./Params/w_hidden2', w_hidden2=sess.run(w_hidden2)) |
| 140 | + np.savez('./Params/b_hidden2', b_hidden2=sess.run(b_hidden2)) |
| 141 | + np.savez('./Params/w_output', w_output=sess.run(w_output)) |
| 142 | + np.savez('./Params/b_output', b_output=sess.run(b_output)) |
128 | 143 |
|
129 | 144 | print '\nTotal time taken: ', time.time() - init_time
|
0 commit comments