1
- #additional libraries
2
1
import numpy as np
3
2
import theano
4
3
from theano import tensor as T
10
9
11
10
#random number generator
12
11
rng = np .random
13
-
14
- #details regarding training set
15
12
#number of training examples
16
- N = 5000
13
+ M = 5000
17
14
#number of features
18
- features = 784
15
+ N = 784
19
16
#number of output classes
20
17
out_class = 10
21
18
#learning rate
22
- alpha = 0.00001
23
-
19
+ alpha = 0.1
24
20
training_steps = 1000
25
21
26
22
#read the data
33
29
Y_train = train ['label' ]
34
30
Y_train = Y_train [0 :5000 ]
35
31
32
+ #convert the values to numpy arrays
33
+ X_train = X_train .as_matrix ()
34
+ X_train = X_train .astype (theano .config .floatX )
35
+ Y_train = Y_train .astype (int )
36
+
37
+ #do the feature scaling of parameters...belong to [0, 1]
38
+ max_pixel_val = 255
39
+ min_pixel_val = 0
40
+ X_train = X_train / (max_pixel_val - min_pixel_val )
41
+
42
+
36
43
#declare variables
37
- # x- (1x785) and y- (1x10)
38
- # 'x' is matrix of 'Nxfeatures' values
39
44
x = T .dmatrix ('x' )
40
- # 'y' is vector of 10 values (output) corresponding to each digit
41
- y = T .ivector ('y' )
45
+ y = T .dvector ('y' )
42
46
43
47
#declaring weights and bias term for all the classes
44
- W = np .zeros ((features , out_class )). astype ( theano . config . floatX )
45
- B = np .zeros ((1 , out_class )). astype ( theano . config . floatX )
46
- w = shared (np . zeros (( features ), dtype = theano . config . floatX ), name = 'w' )
47
- b = shared (0.0 , name = 'b' )
48
+ W = np .zeros ((N , out_class ))
49
+ B = np .zeros ((1 , out_class ))
50
+ w = shared (value = rng . randn (( N ) ), name = 'w' )
51
+ b = shared (value = rng . randn () , name = 'b' )
48
52
49
53
#hypothesis function is basically thus
50
- h = 1 / (1 + T .exp (- T .dot (x , w ) - b ))
51
- #predict true if hypothesis is greater than 0.5
52
- H = 1 / (1 + T .exp (- T .dot (x , W ) - B ))
54
+ h = 1.0 / (1.0 + T .exp (- T .dot (x , w ) - b ))
55
+ pred = h > 0.5
53
56
54
- #had to change this from theano.tensor to numpy?
55
- pred = T .argmax (H , axis = 1 )
56
- #for i in xrange(N):
57
- # pred[i] = T.argmax(H[i])
57
+ # H = 1.0 / (1.0 + T.exp(-T.dot(x, W) - B))
58
+ # pred_vec = T.argmax(H, axis=1)
58
59
59
-
60
- #cost function
60
+ #cost function with regularization
61
61
J = - y * T .log (h ) - (1 - y ) * T .log (1 - h )
62
- cost = J .mean ()
63
- #add reguralization term to the cost function
64
62
reg = 0.01 * (w ** 2 ).sum ()
65
- cost = cost + reg
63
+ cost = J .mean () + reg
64
+
66
65
#calculate the gradients of parameters
67
66
grad_w , grad_b = T .grad (cost , [w , b ])
68
67
69
68
#function for training and predicting
70
- train = function (inputs = [x , y ], outputs = [pred , cost ],\
71
- updates = [(w , w - alpha * grad_w ), (b , b - alpha * grad_b )])
72
-
73
- predict = function (inputs = [x ], outputs = pred )
69
+ train = function (inputs = [x , y ], outputs = [pred , cost ], \
70
+ updates = [(w , w - alpha * grad_w ), (b , b - alpha * grad_b )])
71
+ # predict = function(inputs=[x], outputs=[H, pred_vec])
74
72
73
+ #new predict function for a given class DEBUGGING
74
+ predict_class = function (inputs = [x ], outputs = [h , pred ])
75
75
76
+ """
77
+ TRAIN THE MODEL
78
+ """
76
79
#training the model separately for each class
77
80
for i in xrange (out_class ):
78
- Y_vec_train = np .zeros ((N ,))
79
- Y_dummy = Y_train == i
80
81
81
- #create vector output consisting of zeros and ones
82
- for k in xrange (N ):
82
+ #create a M-dimensional vector of 0s and 1s for that class
83
+ Y_vec_train = np .zeros ((M , ))
84
+ Y_dummy = (Y_train == i )
85
+ for k in xrange (M ):
83
86
if Y_dummy [k ] == True :
84
87
Y_vec_train [k ] = 1
85
-
86
- #declare the weights and bias term
87
- w = shared (rng .randn ((features )), name = 'w' )
88
- #why didn't bias term update when initialised with 'zero'
89
- b = shared (rng .randn (), name = 'b' )
90
88
91
- print '\n \n Training the parameters for class ' , i
92
- print 'Iterating over entire training set\n '
89
+ # if i == 0:
90
+ # print 'random b: ', b.get_value()
91
+ # print 'random w: ', w.get_value()
92
+
93
+ #set the random values to the weights and bias terms
94
+ w .set_value (rng .randn ((N )))
95
+ b .set_value (rng .randn ())
96
+
97
+ print '\n Training the parameters for class ' , i
98
+ # print 'Iterating over entire training set...\n'
93
99
#train the parameters for this particular class
94
100
cost_vec = np .zeros ((training_steps , ))
95
101
for j in xrange (training_steps ):
96
102
pred , cost = train (X_train , Y_vec_train )
97
- print 'Iteration: ' , j + 1 , '\t Cost: ' , cost
103
+ # print 'updated b: ', b.get_value()
104
+ # print 'updated w: ', w.get_value()[0:10]
105
+ # time.sleep(0.5)
106
+ print '\n '
107
+ print 'Iter: ' , j + 1 , '\t J: ' , cost , \
108
+ '\t b: ' , b .get_value (), '\t w: ' , w .get_value ()[0 :3 ]
98
109
cost_vec [j ] = cost
99
110
111
+ #predictions made by the model at the end of training for class i
112
+ # print '\n\nPredicting the output after the training for class ', i
113
+ pred_count = 0
114
+ for predicted , target in zip (pred , Y_vec_train ):
115
+ if predicted == target :
116
+ pred_count += 1
117
+ # print 'predicted: ', predicted, ' Target: ', target
118
+ print 'Accuracy in classification for class ' , i , ' is: ' , (pred_count / 5000.0 ) * 100
119
+
100
120
#plot cost as a function of weights
101
121
x_vals = [idx for idx in range (training_steps )]
102
122
y_vals = cost_vec
103
123
plt .plot (x_vals , y_vals , 'r' )
104
- plt .savefig ("./cost {i}.png" .format (i = i ))
124
+ plt .savefig ("./cost_for_class_ {i}.png" .format (i = i ))
105
125
plt .show ()
106
126
plt .cla ()
107
127
plt .clf ()
108
128
109
129
#store the weights of parameters for that particular class
110
- print 'Done with training for class ' , i
130
+ # print 'Done with training for class ', i
131
+
132
+ # print 'weights for class: ', i, w.get_value()
133
+ # print 'bias term for class: ', i, b.get_value()
134
+
135
+ #predict the hypothesis for the classes
136
+ hypo , pr = predict_class (X_train )
137
+ # if i == 0:
138
+ # # print 'Xtrain[0, :] : \n', [idx2 for idx2 in X_train[0, :]]
139
+
140
+ # print hypo
141
+ # print pr
142
+ # # print '\ntrained b: ', b.get_value()
143
+ # # print 'trained w: ', w.get_value()
144
+ # # print 'weights:\n', w.get_value()
145
+ # # print '\n\nbias: \n', b.get_value()
146
+
147
+ # # print w.get_value()[0:20]
148
+ # # print b.get_value()
111
149
112
- print 'weights for class: ' , i , w .get_value ()
113
- print 'bias term for class: ' , i , b .get_value ()
114
150
115
151
W [:, i ] = w .get_value ()
116
152
B [0 , i ] = b .get_value ()
117
-
118
- #debug here...are these values stored correctly??
119
- #print 'Weights are as follows: \n'
120
- #print 'Unbiased weights: \n', W[:, i]
121
- #print '\nBiased weight: \n', B[0, i]
122
- #print 'Stored the weights for class ', i
123
153
time .sleep (2 )
124
154
125
155
print 'Done with training the model!'
126
156
127
157
#save parameters for later use
128
- #ANALYSIS DONE...SAVED CORRECTLY
129
158
np .savez ('./weights.npz' , w = W )
130
159
np .savez ('./bias.npz' , b = B )
131
160
print 'Saved the parameters...'
161
+
162
+
163
+ """
164
+ PREDICT THE ACCURACY OVER TRAINING SET
165
+ """
166
+ H = 1.0 / (1.0 + T .exp (- T .dot (x , W ) - B ))
167
+ pred_vec = T .argmax (H , axis = 1 )
168
+ predict = function (inputs = [x ], outputs = [H , pred_vec ])
169
+
170
+
171
+ print 'Predicting accuracy over the training set: '
172
+ Hypo , predicted = predict (X_train )
173
+ print 'hypo shape: ' , Hypo .shape
174
+ print 'predicted shape: ' , predicted .shape
175
+ print '\n \n Hypo: ' , Hypo [0 :20 , :]
176
+ print '\n \n predicted: ' , predicted [0 :20 ]
177
+ print '\n \n target: ' , Y_train [0 :20 ]
178
+
179
+ pred_count = 0
180
+ for target , _pred in zip (Y_train , predicted ):
181
+ # print 'predicted: ', _pred, ' Target: ', target
182
+ if target == _pred :
183
+ pred_count += 1
184
+ print 'Accuracy over entire set of data is: ' , (pred_count / 5000.0 ) * 100
0 commit comments