working model for digits classification

kvmanohar22 · kvmanohar22 · commit 3947c1683c36 · 2016-10-04T12:51:46.000+05:30
diff --git a/NaiveImplementation/digit_classifier.py b/NaiveImplementation/digit_classifier.py
@@ -1,4 +1,3 @@
-#additional libraries
 import numpy as np
 import theano 
 from theano import tensor as T
@@ -10,17 +9,14 @@
 
 #random number generator
 rng = np.random
-
-#details regarding training set
 #number of training examples
-N = 5000
+M = 5000
 #number of features
-features = 784
+N = 784
 #number of output classes
 out_class = 10
 #learning rate
-alpha = 0.00001
-
+alpha = 0.1
 training_steps = 1000
 
 #read the data
@@ -33,99 +29,156 @@
 Y_train = train['label']
 Y_train = Y_train[0:5000]
 
+#convert the values to numpy arrays
+X_train = X_train.as_matrix()
+X_train = X_train.astype(theano.config.floatX)
+Y_train = Y_train.astype(int)
+
+#do the feature scaling of parameters...belong to [0, 1]
+max_pixel_val = 255
+min_pixel_val = 0
+X_train = X_train / (max_pixel_val - min_pixel_val)
+
+
 #declare variables
-# x- (1x785) and y- (1x10)
-# 'x' is matrix of 'Nxfeatures' values 
 x = T.dmatrix('x')
-# 'y' is vector of 10 values (output) corresponding to each digit
-y = T.ivector('y')
+y = T.dvector('y')
 
 #declaring weights and bias term for all the classes
-W = np.zeros((features, out_class)).astype(theano.config.floatX)
-B = np.zeros((1, out_class)).astype(theano.config.floatX)
-w = shared(np.zeros((features), dtype=theano.config.floatX), name='w')
-b = shared(0.0, name='b')
+W = np.zeros((N, out_class))
+B = np.zeros((1, out_class))
+w = shared(value=rng.randn((N)), name='w')
+b = shared(value=rng.randn(), name='b')
 
 #hypothesis function is basically thus
-h = 1 / (1 + T.exp(-T.dot(x, w) - b))
-#predict true if hypothesis is greater than 0.5
-H = 1 / (1 + T.exp(-T.dot(x, W) - B))
+h = 1.0 / (1.0 + T.exp(-T.dot(x, w) - b))
+pred = h > 0.5
 
-#had to change this from theano.tensor to numpy?
-pred = T.argmax(H, axis=1)
-#for i in xrange(N):
-#	pred[i] = T.argmax(H[i])
+# H = 1.0 / (1.0 + T.exp(-T.dot(x, W) - B))
+# pred_vec = T.argmax(H, axis=1)
 
-
-#cost function
+#cost function with regularization
 J = -y * T.log(h) - (1-y) * T.log(1-h)
-cost = J.mean()
-#add reguralization term to the cost function
 reg = 0.01 * (w ** 2).sum()
-cost = cost + reg
+cost = J.mean() + reg
+
 #calculate the gradients of parameters
 grad_w, grad_b = T.grad(cost, [w, b])
 
 #function for training and predicting
-train = function(inputs=[x, y], outputs=[pred, cost],\
-                 updates=[(w, w-alpha * grad_w), (b, b-alpha*grad_b)])
-
-predict = function(inputs=[x], outputs=pred)
+train = function(inputs=[x, y], outputs=[pred, cost], \
+	             updates=[(w, w - alpha * grad_w), (b, b - alpha*grad_b)])
+# predict = function(inputs=[x], outputs=[H, pred_vec])
 
+#new predict function for a given class DEBUGGING
+predict_class = function(inputs=[x], outputs=[h, pred])
 
+"""
+TRAIN THE MODEL
+"""
 #training the model separately for each class
 for i in xrange(out_class):
-	Y_vec_train = np.zeros((N,))
-	Y_dummy = Y_train == i
 
-	#create vector output consisting of zeros and ones
-	for k in xrange(N):
+	#create a M-dimensional vector of 0s and 1s for that class
+	Y_vec_train = np.zeros((M, ))
+	Y_dummy = (Y_train == i)
+	for k in xrange(M):
 		if Y_dummy[k] == True:
 			Y_vec_train[k] = 1
-	
-	#declare the weights and bias term 
-	w = shared(rng.randn((features)), name='w')
-	#why didn't bias term update when initialised with 'zero'
-	b = shared(rng.randn(), name='b')
 
-	print '\n\nTraining the parameters for class ', i
-	print 'Iterating over entire training set\n'
+	# if i == 0:
+	# 	print 'random b: ', b.get_value()
+	# 	print 'random w: ', w.get_value()
+
+	#set the random values to the weights and bias terms
+	w.set_value(rng.randn((N)))
+	b.set_value(rng.randn())
+
+	print '\nTraining the parameters for class ', i
+	# print 'Iterating over entire training set...\n'
 	#train the parameters for this particular class
 	cost_vec = np.zeros((training_steps, ))
 	for j in xrange(training_steps):
 		pred, cost = train(X_train, Y_vec_train)
-		print 'Iteration: ', j+1, '\tCost: ', cost
+		# print 'updated b: ', b.get_value()
+		# print 'updated w: ', w.get_value()[0:10]
+		# time.sleep(0.5)
+		print '\n'
+		print 'Iter: ', j+1, '\tJ: ', cost, \
+		      '\tb: ', b.get_value(), '\tw: ', w.get_value()[0:3]
 		cost_vec[j] = cost
 
+	#predictions made by the model at the end of training for class i
+	# print '\n\nPredicting the output after the training for class ', i
+	pred_count = 0
+	for predicted, target in zip(pred, Y_vec_train):
+		if predicted == target:
+			pred_count += 1
+		# print 'predicted: ', predicted, ' Target: ', target
+	print 'Accuracy in classification for class ', i, ' is: ', (pred_count/5000.0) * 100
+
 	#plot cost as a function of weights 
 	x_vals = [idx for idx in range(training_steps)]
 	y_vals = cost_vec
 	plt.plot(x_vals, y_vals, 'r')
-	plt.savefig("./cost{i}.png".format(i=i))
+	plt.savefig("./cost_for_class_{i}.png".format(i=i))
 	plt.show()
 	plt.cla()
 	plt.clf()
 
 	#store the weights of parameters for that particular class
-	print 'Done with training for class ', i
+	# print 'Done with training for class ', i
+
+   	# print 'weights for class: ', i, w.get_value()
+   	# print 'bias term for class: ', i, b.get_value()
+
+   	#predict the hypothesis for the classes
+   	hypo, pr = predict_class(X_train)
+  #  	if i == 0:
+  #  		# print 'Xtrain[0, :] : \n', [idx2 for idx2 in X_train[0, :]]
+
+		# print hypo
+		# print pr
+		# # print '\ntrained b: ', b.get_value()
+		# # print 'trained w: ', w.get_value()
+		# # print 'weights:\n', w.get_value()
+		# # print '\n\nbias: \n', b.get_value()
+
+  #  	# print w.get_value()[0:20]
+  #  	# print b.get_value()
 
-   	print 'weights for class: ', i, w.get_value()
-   	print 'bias term for class: ', i, b.get_value()
 
 	W[:, i] = w.get_value()
 	B[0, i] = b.get_value()	
-
-	#debug here...are these values stored correctly??
-	#print 'Weights are as follows: \n'
-	#print 'Unbiased weights: \n', W[:, i]
-	#print '\nBiased weight: \n', B[0, i]
-	#print 'Stored the weights for class ', i
 	time.sleep(2)
 	
 print 'Done with training the model!'
 
 #save parameters for later use 
-#ANALYSIS DONE...SAVED CORRECTLY
 np.savez('./weights.npz', w=W)
 np.savez('./bias.npz', b=B)
 print 'Saved the parameters...'
+
+
+"""
+PREDICT THE ACCURACY OVER TRAINING SET
+"""
+H = 1.0 / (1.0 + T.exp(-T.dot(x, W) - B))
+pred_vec = T.argmax(H, axis=1)
+predict = function(inputs=[x], outputs=[H, pred_vec])
+
+
+print 'Predicting accuracy over the training set: '
+Hypo, predicted = predict(X_train)
+print 'hypo shape: ', Hypo.shape
+print 'predicted shape: ', predicted.shape
+print '\n\nHypo: ', Hypo[0:20, :]
+print '\n\npredicted: ', predicted[0:20]
+print '\n\ntarget: ', Y_train[0:20]
+
+pred_count = 0
+for target, _pred in zip(Y_train, predicted):
+	# print 'predicted: ', _pred, ' Target: ', target
+	if target == _pred:
+		pred_count += 1
+print 'Accuracy over entire set of data is: ', (pred_count/5000.0) * 100
diff --git a/NaiveImplementation/predict.py b/NaiveImplementation/predict.py
@@ -1,35 +1,76 @@
 import numpy as np
 import pandas as pd
 import theano
+from theano import function
+import time
+from theano import tensor as T
 
 #read weights and bias term
 weights = np.load('./weights.npz')
 bias = np.load('./bias.npz')
-weights = weights['w']
-bias = bias['b']
+w = weights['w']
+b = bias['b']
+
+# print 'weights shape: ', w.shape
+# print 'bias shape: ', b.shape 
+
+#constants
+M = 5000
+
+#read the data
 train = pd.read_csv('./../Data/train.csv')
-X = train.iloc[0:5000, 1:]
-Y = train['label']
-Y = Y[0:5000]
 
-X = X.as_matrix()
-X = X.astype(theano.config.floatX)
+#split data into input and output values
+X_train = train.iloc[0:5000, 1:]
+Y_train = train['label']
+Y_train = Y_train[0:5000]
+
+#convert the values to numpy arrays
+X_train = X_train.as_matrix()
+X_train = X_train.astype(theano.config.floatX)
+Y_train = Y_train.astype(int)
+
+#do the feature scaling of parameters...belong to [0, 1]
+max_pixel_val = 255
+min_pixel_val = 0
+X_train = X_train / (max_pixel_val - min_pixel_val)
+
+
+#declare variables
+x = T.dmatrix('x')
 
 #hypothesis function
-h = 1 / (1 + np.exp(-np.dot(X, weights) - bias))
-count = 0
-N = 5000
-pred = np.zeros((N, 1))
-print h[0:10]
+h = 1.0 / (1.0 + T.exp(-T.dot(x, w) - b))
+pred_vec = T.argmax(h, axis=1)
+# print 'w[:, 0] :', [idx1 for idx1 in w[:, 0]]
+# print 'b[0][0]: ', b[0][0]
+
+#predict the hypothesis only for the class i
+# h_0 = 1.0 / (1.0 + T.exp(-T.dot(x, w[:, 0]) - b[0][0]))
 
-print 'Shape of hypothesis over training set: ', h.shape
-for i in xrange(N):
-	pred[i] = np.argmax(h[i])
+#compile
+predict = function(inputs=[x], outputs=[h, pred_vec])
+# predict_0 = function(inputs=[x], outputs=h_0)
 
-for i in xrange(N):
-	if pred[i] == Y[i]:
+# debugging function
+print 'reading hypothesis...'
+#predict values
+hypo, predicted = predict(X_train)
+# predicted_0 = predict_0(X_train)
+# print 'hypo shape: ', hypo.shape, ' predicted shape: ', predicted.shape
+# print 'hypothesis: ', hypo[0:20, :]
+# print 'corresponding class: ', predicted[0:20]
+# print 'X_train[0, :] : \n', [idx2 for idx2 in X_train[0, :]]
+# print '\npredictions for class 0: \n', predicted_0
+
+print 'Predicting over training set: \n'
+count = 0
+for i in xrange(M):
+	# print 'Iteration: ', i , '\tpredicted: ', predicted[i], '\ttarget: ', Y[i]
+	# time.sleep(0.1)
+	if predicted[i] == Y_train[i]:
 		count += 1
 
 #print accuracy over the given set of examples
-print 'Accuracy : ', (count/42000.0) * 100
-print 'Done!!'
+print 'Accuracy : ', (count/5000.0) * 100, '%'
+print 'Done!!'