-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtestBiRNNBatched.py
91 lines (78 loc) · 3.93 KB
/
testBiRNNBatched.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import numpy as np
import tensorflow as tf
import pandas as pd
import math
from gloveProject import loadWordVectors
#import matplotlib.pyplot as plt
import itertools
from BiRNNbatched import get_one_hot, get_all_vecs, chunker, get_batch
batch_size = 64
gloveSize = 200
def test(test_set):
test_set = test_set.drop(["useful","funny","cool", "date", "review_id", "user_id", "business_id"],axis=1)
print len(test_set)
smoothed_cost_list = []
correct_class = 0
attempts = 0
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('./experiment_logs/biRNN-10.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./experiment_logs'))
graph = tf.get_default_graph()
summary_writer = tf.summary.FileWriter('logs', sess.graph)
inputs = tf.get_collection('inputs_placeholder')[0]
labels = tf.get_collection('labels_placeholder')[0]
sequence_lengths = tf.get_collection('sequence_lengths')[0]
keep_prob = tf.get_collection('keep_prob')[0]
logits = tf.get_collection('logits')[0]
loss = tf.get_collection('loss')[0]
smoothed_cost_list = []
correct_class = 0
attempts = 0
for batch in chunker(test_set, size=batch_size):
inputs_batch, labels_batch, correct_labels, pad_until = get_batch(batch)
seq_lens = np.array([inputs_batch[k].shape[1] for k in range(len(inputs_batch))])
#print(inputs_batch[0].shape)
inputs_batch = [np.concatenate((inputs_batch[j],np.zeros((1,pad_until-inputs_batch[j].shape[1],gloveSize),np.float32)), axis=1) for j in range(len(inputs_batch))]
inputs_batch = np.concatenate(inputs_batch, axis=0)
labels_batch = np.concatenate(labels_batch, axis=0)
yhat, curr_loss = sess.run([logits, loss], feed_dict={inputs: inputs_batch, labels: labels_batch, sequence_lengths: seq_lens, keep_prob: 1.0})
preds = tf.argmax(tf.nn.softmax(yhat),axis=1)
pred = preds.eval(session=sess)+1#convert to prediction and normalize
correct_class += np.sum(pred==np.array(correct_labels))
attempts += min(batch_size, inputs_batch.shape[0])
smoothed_cost_list.append(curr_loss)
smoothed_cost = float(sum(smoothed_cost_list))/len(smoothed_cost_list)
objectives_summary = tf.Summary()
objectives_summary.value.add(tag='train_dev_smoothed_cost', simple_value=smoothed_cost)
summary_writer.add_summary(objectives_summary)
summary_writer.flush()
print ": Accuracy = " + str(float(correct_class)*100/attempts) + ", Smoothed Cost : " + str(smoothed_cost)
#Computes confusion matrix
#class_names = ["1 star", "2 star", "3 star", "4 star", "5 star"]
'''class_names = [1, 2, 3, 4, 5]
confmat = confusion_matrix(correct_labels, pred, class_names)
np.set_printoptions(precision=2)
plt.figure()
plot_confusion_matrix(confmat, classes=class_names, normalize=False, title='Confusion matrix, without normalization')
plt.figure()
plot_confusion_matrix(confmat, classes=class_names, normalize=True, title='Normalized confusion matrix')'''
# Commented out to prevent showing the matrix everytime code is run
#plt.show()
'''precision = []
recall = []
for i in range(len(class_names)):
pi = float(confmat[i][i]) / (np.sum(confmat, axis = 0))[i]
ri = float(confmat[i][i]) / (np.sum(confmat, axis = 1))[i]
precision.append(pi)
recall.append(ri)
print("Precision: ", precision)
#print("Recall: ", recall)'''
def main():
yelp = pd.read_csv("newreviews.csv")
#yelp_reduced = yelp.loc[0:499]
# The train_dev set uses locations 700,000 to 710,000
#yelp_train_dev = yelp.loc[700000:709999]
yelp_train_dev = yelp.loc[700000:709999]
test(yelp_train_dev)
if __name__== "__main__":
main()