Skip to content

Commit fe5f32e

Browse files
committedAug 1, 2018
initial commit
0 parents  commit fe5f32e

16 files changed

+1905
-0
lines changed
 

‎.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*.pyc
2+
*.jpg
3+
*.png

‎MyConvLSTMCell.py

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import torch
2+
import torch.nn as nn
3+
from torch.autograd import Variable
4+
import torch.nn.functional as F
5+
6+
class MyConvLSTMCell(nn.Module):
7+
8+
def __init__(self, input_size, hidden_size, kernel_size=3, stride=1, padding=1):
9+
super(MyConvLSTMCell, self).__init__()
10+
self.input_size = input_size
11+
self.hidden_size = hidden_size
12+
self.kernel_size = kernel_size
13+
self.stride = stride
14+
self.padding = padding
15+
self.conv_i_xx = nn.Conv2d(input_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding)
16+
self.conv_i_hh = nn.Conv2d(hidden_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding,
17+
bias=False)
18+
19+
self.conv_f_xx = nn.Conv2d(input_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding)
20+
self.conv_f_hh = nn.Conv2d(hidden_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding,
21+
bias=False)
22+
23+
self.conv_c_xx = nn.Conv2d(input_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding)
24+
self.conv_c_hh = nn.Conv2d(hidden_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding,
25+
bias=False)
26+
27+
self.conv_o_xx = nn.Conv2d(input_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding)
28+
self.conv_o_hh = nn.Conv2d(hidden_size, hidden_size, kernel_size=kernel_size, stride=stride, padding=padding,
29+
bias=False)
30+
31+
torch.nn.init.xavier_normal(self.conv_i_xx.weight)
32+
torch.nn.init.constant(self.conv_i_xx.bias, 0)
33+
torch.nn.init.xavier_normal(self.conv_i_hh.weight)
34+
35+
torch.nn.init.xavier_normal(self.conv_f_xx.weight)
36+
torch.nn.init.constant(self.conv_f_xx.bias, 0)
37+
torch.nn.init.xavier_normal(self.conv_f_hh.weight)
38+
39+
torch.nn.init.xavier_normal(self.conv_c_xx.weight)
40+
torch.nn.init.constant(self.conv_c_xx.bias, 0)
41+
torch.nn.init.xavier_normal(self.conv_c_hh.weight)
42+
43+
torch.nn.init.xavier_normal(self.conv_o_xx.weight)
44+
torch.nn.init.constant(self.conv_o_xx.bias, 0)
45+
torch.nn.init.xavier_normal(self.conv_o_hh.weight)
46+
47+
def forward(self, x, state):
48+
if state is None:
49+
state = (Variable(torch.randn(x.size(0), x.size(1), x.size(2), x.size(3)).cuda()),
50+
Variable(torch.randn(x.size(0), x.size(1), x.size(2), x.size(3)).cuda()))
51+
ht_1, ct_1 = state
52+
it = F.sigmoid(self.conv_i_xx(x) + self.conv_i_hh(ht_1))
53+
ft = F.sigmoid(self.conv_f_xx(x) + self.conv_f_hh(ht_1))
54+
ct_tilde = F.tanh(self.conv_c_xx(x) + self.conv_c_hh(ht_1))
55+
ct = (ct_tilde * it) + (ct_1 * ft)
56+
ot = F.sigmoid(self.conv_o_xx(x) + self.conv_o_hh(ht_1))
57+
ht = ot * F.tanh(ct)
58+
return ht, ct

‎README.md

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Object-centric Attention for Egocentric Activity Recognition
2+
3+
The git contains the source code associated with our BMVC 2018 paper:
4+
"Attention is All We Need: Nailing Down Object-centric Attention for Egocentric Activity Recognition"
5+
The paper is available in [here](https://arxiv.org/pdf/1807.11794.pdf).
6+
7+
#### Prerequisites
8+
9+
* Python 3.5
10+
* Pytorch 0.3.1
11+
####
12+
13+
*Training code will be released soon!*
14+
15+
#### **Evaluating the models**
16+
17+
* ##### **RGB**
18+
* ```
19+
python eval-run-rgb.py --dataset gtea61
20+
--datasetDir ./dataset/gtea_61/split2/test
21+
--modelStateDict best_model_state_rgb.pth
22+
--seqLen 25
23+
--memSize 512
24+
```
25+
* ##### **Flow**
26+
* ```
27+
python eval-run-rgb.py --dataset gtea61
28+
--datasetDir ./dataset/gtea_61/split2/test
29+
--modelStateDict best_model_state_flow.pth
30+
--stackSize 5
31+
--numSegs 5
32+
```
33+
* ##### **Two Stream**
34+
* ```
35+
python eval-run-twoStream-joint.py --dataset gtea61
36+
--datasetDir ./dataset/gtea_61/split2/test
37+
--modelStateDict best_model_state_twoStream.pth
38+
--seqLen 25
39+
--stackSize 5
40+
--memSize 512
41+
```
42+
43+
#### **Pretrained models**
44+
45+
The models trained on the fixed split \(S2\) of GTEA 61 can be downloaded from the following links
46+
47+
* RGB model [https://drive.google.com/open?id=1B7Xh6hQ9Py8fmL-pjmLzlCent6dnuex5](https://drive.google.com/open?id=1B7Xh6hQ9Py8fmL-pjmLzlCent6dnuex5 "RGB model")
48+
* Flow model [https://drive.google.com/open?id=1eG-ZF1IwOtYJqpIIeMASURB0uyCM\_cFd](https://drive.google.com/open?id=1eG-ZF1IwOtYJqpIIeMASURB0uyCM_cFd "Flow model")
49+
* Two stream model [https://drive.google.com/open?id=11U5xbrOr8GtEhpkxY2lpPsyFDFJ8savp](https://drive.google.com/open?id=11U5xbrOr8GtEhpkxY2lpPsyFDFJ8savp "Two stream model")
50+
51+
The dataset can be downloaded from the following link:
52+
53+
[http://www.cbi.gatech.edu/fpv/](http://www.cbi.gatech.edu/fpv/)
54+
55+
Once the videos are downloaded, extract the frames and optical flow using the following implementation:
56+
57+
[https://github.com/yjxiong/dense\_flow](https://github.com/yjxiong/dense_flow)
58+
59+
Run 'prepareGTEA61Dataset.py' script to make the dataset.
60+
61+
Alternatively, the frames and the corresponding warp optical flow of the GTEA 61 dataset can be downloaded from the following link
62+
63+
* [https://drive.google.com/file/d/1\_y8Y3PnCXsngmZVMqZbg-AfJyIdOeQ2\_/view?usp=sharing](https://drive.google.com/file/d/1_y8Y3PnCXsngmZVMqZbg-AfJyIdOeQ2_/view?usp=sharing "GTEA61")
64+
65+
66+

‎eval-run-flow.py

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
from __future__ import print_function, division
2+
from flow_resnet import *
3+
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize)
4+
from torch.autograd import Variable
5+
from torch.utils.data.sampler import WeightedRandomSampler
6+
from makeDatasetFlow import *
7+
from sklearn.metrics import confusion_matrix
8+
import matplotlib.pyplot as plt
9+
import argparse
10+
import sys
11+
12+
def main_run(dataset, model_state_dict, dataset_dir, stackSize, numSeg):
13+
14+
if dataset == 'gtea61':
15+
num_classes = 61
16+
elif dataset == 'gtea71':
17+
num_classes = 71
18+
elif dataset == 'gtea_gaze':
19+
num_classes = 44
20+
elif dataset == 'egtea':
21+
num_classes = 106
22+
23+
mean=[0.485, 0.456, 0.406]
24+
std=[0.229, 0.224, 0.225]
25+
26+
normalize = Normalize(mean=mean, std=std)
27+
28+
spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])
29+
30+
vid_seq_test = makeDataset(dataset_dir, spatial_transform=spatial_transform, sequence=True,
31+
numSeg=numSeg, stackSize=stackSize, fmt='.jpg', phase='Test')
32+
33+
test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1,
34+
shuffle=False, num_workers=2, pin_memory=True)
35+
36+
model = flow_resnet34(False, channels=2*stackSize, num_classes=num_classes)
37+
model.load_state_dict(torch.load(model_state_dict))
38+
for params in model.parameters():
39+
params.requires_grad = False
40+
41+
model.train(False)
42+
model.cuda()
43+
test_samples = vid_seq_test.__len__()
44+
print('Number of samples = {}'.format(test_samples))
45+
print('Evaluating...')
46+
numCorr = 0
47+
true_labels = []
48+
predicted_labels = []
49+
50+
for j, (inputs, targets) in enumerate(test_loader):
51+
inputVariable = Variable(inputs[0].cuda(), volatile=True)
52+
output_label, _ = model(inputVariable)
53+
output_label_mean = torch.mean(output_label.data, 0, True)
54+
_, predicted = torch.max(output_label_mean, 1)
55+
numCorr += (predicted == targets[0]).sum()
56+
true_labels.append(targets)
57+
predicted_labels.append(predicted)
58+
test_accuracy = (numCorr / test_samples) * 100
59+
print('Test Accuracy = {}%'.format(test_accuracy))
60+
61+
cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
62+
cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]
63+
64+
ticks = np.linspace(0, 60, num=61)
65+
plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
66+
plt.colorbar()
67+
plt.xticks(ticks, fontsize=6)
68+
plt.yticks(ticks, fontsize=6)
69+
plt.grid(True)
70+
plt.clim(0, 1)
71+
plt.savefig(dataset + '-flow.jpg', bbox_inches='tight')
72+
plt.show()
73+
74+
def __main__():
75+
parser = argparse.ArgumentParser()
76+
parser.add_argument('--dataset', type=str, default='gtea61', help='Dataset')
77+
parser.add_argument('--datasetDir', type=str, default='./dataset/gtea_warped_flow_61/split2/test',
78+
help='Dataset directory')
79+
parser.add_argument('--modelStateDict', type=str,
80+
default='./models/gtea61/best_model_state_dict_flow_split2.pth',
81+
help='Model path')
82+
parser.add_argument('--stackSize', type=int, default=5, help='Number of optical flow images in input')
83+
parser.add_argument('--numSegs', type=int, default=5, help='Number of stacked optical flows')
84+
85+
args = parser.parse_args()
86+
87+
dataset = args.dataset
88+
model_state_dict = args.modelStateDict
89+
dataset_dir = args.datasetDir
90+
stackSize = args.stackSize
91+
numSegs = args.numSegs
92+
93+
main_run(dataset, model_state_dict, dataset_dir, stackSize, numSegs)
94+
95+
__main__()

‎eval-run-rgb.py

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from __future__ import print_function, division
2+
from objectAttentionModelConvLSTM import *
3+
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize)
4+
from makeDatasetRGB import *
5+
from sklearn.metrics import confusion_matrix
6+
import matplotlib.pyplot as plt
7+
import argparse
8+
import sys
9+
10+
def main_run(dataset, model_state_dict, dataset_dir, seqLen, memSize):
11+
12+
if dataset == 'gtea61':
13+
num_classes = 61
14+
elif dataset == 'gtea71':
15+
num_classes = 71
16+
elif dataset == 'gtea_gaze':
17+
num_classes = 44
18+
elif dataset == 'egtea':
19+
num_classes = 106
20+
21+
mean=[0.485, 0.456, 0.406]
22+
std=[0.229, 0.224, 0.225]
23+
24+
normalize = Normalize(mean=mean, std=std)
25+
spatial_transform = Compose([Scale(256), CenterCrop(224), ToTensor(), normalize])
26+
27+
vid_seq_test = makeDataset(dataset_dir,
28+
spatial_transform=spatial_transform,
29+
seqLen=seqLen, fmt='.jpg')
30+
31+
test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1,
32+
shuffle=False, num_workers=2, pin_memory=True)
33+
34+
model = attentionModel(num_classes=num_classes, mem_size=memSize)
35+
model.load_state_dict(torch.load(model_state_dict))
36+
37+
for params in model.parameters():
38+
params.requires_grad = False
39+
40+
model.train(False)
41+
model.cuda()
42+
test_samples = vid_seq_test.__len__()
43+
print('Number of samples = {}'.format(test_samples))
44+
print('Evaluating...')
45+
numCorr = 0
46+
true_labels = []
47+
predicted_labels = []
48+
for j, (inputs, targets) in enumerate(test_loader):
49+
inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True)
50+
output_label, _ = model(inputVariable)
51+
_, predicted = torch.max(output_label.data, 1)
52+
numCorr += (predicted == targets.cuda()).sum()
53+
true_labels.append(targets)
54+
predicted_labels.append(predicted)
55+
test_accuracy = (numCorr / test_samples) * 100
56+
print('Test Accuracy = {}%'.format(test_accuracy))
57+
58+
cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float)
59+
cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis]
60+
61+
ticks = np.linspace(0, 60, num=61)
62+
plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary')
63+
plt.colorbar()
64+
plt.xticks(ticks, fontsize=6)
65+
plt.yticks(ticks, fontsize=6)
66+
plt.grid(True)
67+
plt.clim(0, 1)
68+
plt.savefig(dataset + '-rgb.jpg', bbox_inches='tight')
69+
plt.show()
70+
71+
def __main__():
72+
parser = argparse.ArgumentParser()
73+
parser.add_argument('--dataset', type=str, default='gtea61', help='Dataset')
74+
parser.add_argument('--datasetDir', type=str, default='./dataset/gtea_warped_flow_61/split2/test',
75+
help='Dataset directory')
76+
parser.add_argument('--modelStateDict', type=str, default='./models/gtea61/best_model_state_dict_rgb_split2.pth',
77+
help='Model path')
78+
parser.add_argument('--seqLen', type=int, default=25, help='Length of sequence')
79+
parser.add_argument('--memSize', type=int, default=512, help='ConvLSTM hidden state size')
80+
81+
args = parser.parse_args()
82+
83+
dataset = args.dataset
84+
model_state_dict = args.modelStateDict
85+
dataset_dir = args.datasetDir
86+
seqLen = args.seqLen
87+
memSize = args.memSize
88+
89+
main_run(dataset, model_state_dict, dataset_dir, seqLen, memSize)
90+
91+
__main__()

0 commit comments

Comments
 (0)
Please sign in to comment.