Skip to content

Commit ec69538

Browse files
committed
Volumetric Average Pooling + doc + unit test, better performance for Volumetric Max Pooling
1 parent e35f09a commit ec69538

7 files changed

+519
-198
lines changed

VolumetricAveragePooling.lua

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
local VolumetricAveragePooling, parent = torch.class(
2+
'nn.VolumetricAveragePooling', 'nn.Module')
3+
4+
function VolumetricAveragePooling:__init(kT, kW, kH, dT, dW, dH)
5+
parent.__init(self)
6+
7+
dT = dT or kT
8+
dW = dW or kW
9+
dH = dH or kH
10+
11+
self.kT = kT
12+
self.kH = kH
13+
self.kW = kW
14+
self.dT = dT
15+
self.dW = dW
16+
self.dH = dH
17+
end
18+
19+
function VolumetricAveragePooling:updateOutput(input)
20+
input.nn.VolumetricAveragePooling_updateOutput(self, input)
21+
return self.output
22+
end
23+
24+
function VolumetricAveragePooling:updateGradInput(input, gradOutput)
25+
input.nn.VolumetricAveragePooling_updateGradInput(self, input, gradOutput)
26+
return self.gradInput
27+
end
28+
29+
function VolumetricAveragePooling:empty()
30+
self.gradInput:resize()
31+
self.gradInput:storage():resize(0)
32+
self.output:resize()
33+
self.output:storage():resize(0)
34+
end

doc/convolution.md

+12
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ a kernel for computing the weighted average in a neighborhood ;
2323
* [Volumetric Modules](#nn.VolumetricModules) apply to inputs with three-dimensional relationships (e.g. videos) :
2424
* [VolumetricConvolution](#nn.VolumetricConvolution) : a 3D convolution over an input video (a sequence of images) ;
2525
* [VolumetricMaxPooling](#nn.VolumetricMaxPooling) : a 3D max-pooling operation over an input video.
26+
* [VolumetricAveragePooling](#nn.VolumetricAveragePooling) : a 3D average-pooling operation over an input video.
2627

2728
<a name="nn.TemporalModules"/>
2829
## Temporal Modules ##
@@ -605,3 +606,14 @@ module = nn.VolumetricMaxPooling(kT, kW, kH [, dT, dW, dH])
605606
Applies 3D max-pooling operation in `kTxkWxkH` regions by step size
606607
`dTxdWxdH` steps. The number of output features is equal to the number of
607608
input planes / dT.
609+
610+
<a name="nn.VolumetricAveragePooling"/>
611+
### VolumetricAveragePooling ###
612+
613+
```lua
614+
module = nn.VolumetricAveragePooling(kT, kW, kH [, dT, dW, dH])
615+
```
616+
617+
Applies 3D average-pooling operation in `kTxkWxkH` regions by step size
618+
`dTxdWxdH` steps. The number of output features is equal to the number of
619+
input planes / dT.

generic/VolumetricAveragePooling.c

+263
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
#ifndef TH_GENERIC_FILE
2+
#define TH_GENERIC_FILE "generic/VolumetricAveragePooling.c"
3+
#else
4+
5+
static void nn_(VolumetricAveragePooling_updateOutput_frame)(
6+
real *input_p, real *output_p, long nslices,
7+
long itime, long iwidth, long iheight,
8+
long otime, long owidth, long oheight,
9+
int kT, int kW, int kH, int dT, int dW, int dH) {
10+
long k;
11+
#pragma omp parallel for private(k)
12+
for (k = 0; k < nslices; k++) {
13+
/* loop over output */
14+
long i, j, ti;
15+
for(ti = 0; ti < otime; ti++) {
16+
for(i = 0; i < oheight; i++) {
17+
for(j = 0; j < owidth; j++) {
18+
/* local pointers */
19+
real *ip = input_p + k * itime * iwidth * iheight
20+
+ ti * iwidth * iheight * dT + i * iwidth * dH + j * dW;
21+
real *op = output_p + k * otime * owidth * oheight
22+
+ ti * owidth * oheight + i * owidth + j;
23+
24+
/* compute local sum: */
25+
real sum = 0.0;
26+
int x,y,z;
27+
28+
for(z=0; z < kT; z++) {
29+
for(y = 0; y < kH; y++) {
30+
for(x = 0; x < kW; x++) {
31+
sum += *(ip + z * iwidth * iheight + y * iwidth + x);
32+
}
33+
}
34+
}
35+
36+
/* set output to local max */
37+
*op = sum / (kT * kW * kH);
38+
}
39+
}
40+
}
41+
}
42+
}
43+
44+
static int nn_(VolumetricAveragePooling_updateOutput)(lua_State *L) {
45+
THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
46+
int kT = luaT_getfieldcheckint(L, 1, "kT");
47+
int kW = luaT_getfieldcheckint(L, 1, "kW");
48+
int kH = luaT_getfieldcheckint(L, 1, "kH");
49+
int dT = luaT_getfieldcheckint(L, 1, "dT");
50+
int dW = luaT_getfieldcheckint(L, 1, "dW");
51+
int dH = luaT_getfieldcheckint(L, 1, "dH");
52+
THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
53+
long nslices;
54+
long itime;
55+
long iheight;
56+
long iwidth;
57+
long otime;
58+
long oheight;
59+
long owidth;
60+
real *input_data;
61+
real *output_data;
62+
63+
luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2,
64+
"4D or 5D (batch-mode) tensor expected");
65+
66+
int dimN = 0;
67+
int dimt = 1;
68+
int dimh = 2;
69+
int dimw = 3;
70+
71+
if (input->nDimension == 5) {
72+
dimN++;
73+
dimt++;
74+
dimh++;
75+
dimw++;
76+
}
77+
78+
luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH &&
79+
input->size[dimt] >= kT, 2,
80+
"input image smaller than kernel size");
81+
82+
/* sizes */
83+
nslices = input->size[dimN];
84+
itime = input->size[dimt];
85+
iheight = input->size[dimh];
86+
iwidth = input->size[dimw];
87+
otime = (itime - kT) / dT + 1;
88+
oheight = (iheight - kH) / dH + 1;
89+
owidth = (iwidth - kW) / dW + 1;
90+
91+
/* get contiguous input */
92+
input = THTensor_(newContiguous)(input);
93+
94+
if (input->nDimension == 4) { /* non-batch mode */
95+
/* resize output */
96+
THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
97+
98+
input_data = THTensor_(data)(input);
99+
output_data = THTensor_(data)(output);
100+
101+
nn_(VolumetricAveragePooling_updateOutput_frame)(input_data, output_data,
102+
nslices,
103+
itime, iwidth, iheight,
104+
otime, owidth, oheight,
105+
kT, kW, kH, dT, dW, dH);
106+
} else { /* batch mode */
107+
long p;
108+
long nBatch = input->size[0];
109+
110+
long istride = nslices * itime * iwidth * iheight;
111+
long ostride = nslices * otime * owidth * oheight;
112+
113+
/* resize output */
114+
THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
115+
116+
input_data = THTensor_(data)(input);
117+
output_data = THTensor_(data)(output);
118+
119+
#pragma omp parallel for private(p)
120+
for (p=0; p < nBatch; p++) {
121+
nn_(VolumetricAveragePooling_updateOutput_frame)(
122+
input_data + p * istride, output_data + p * ostride,
123+
nslices, itime, iwidth, iheight, otime, owidth, oheight,
124+
kT, kW, kH, dT, dW, dH);
125+
}
126+
}
127+
128+
/* cleanup */
129+
THTensor_(free)(input);
130+
return 1;
131+
}
132+
133+
static void nn_(VolumetricAveragePooling_updateGradInput_frame)(
134+
real *gradInput_p, real *gradOutput_p, long nslices,
135+
long itime, long iwidth, long iheight,
136+
long otime, long owidth, long oheight,
137+
int kT, int kW, int kH, int dT, int dW, int dH) {
138+
long k;
139+
#pragma omp parallel for private(k)
140+
for (k = 0; k < nslices; k++) {
141+
/* loop over output */
142+
long i, j, ti;
143+
for(ti = 0; ti < otime; ti++) {
144+
for(i = 0; i < oheight; i++) {
145+
for(j = 0; j < owidth; j++) {
146+
/* local pointers */
147+
real *ip = gradInput_p + k * itime * iwidth * iheight
148+
+ ti * iwidth * iheight * dT + i * iwidth * dH + j * dW;
149+
real *op = gradOutput_p + k * otime * owidth * oheight
150+
+ ti * owidth * oheight + i * owidth + j;
151+
152+
/* scatter gradients out to footprint: */
153+
real val = *op / (kT * kW * kH);
154+
int x,y,z;
155+
for(z=0; z < kT; z++) {
156+
for(y = 0; y < kH; y++) {
157+
for(x = 0; x < kW; x++) {
158+
*(ip + z * iwidth * iheight + y * iwidth + x) += val;
159+
}
160+
}
161+
}
162+
}
163+
}
164+
}
165+
}
166+
}
167+
168+
static int nn_(VolumetricAveragePooling_updateGradInput)(lua_State *L) {
169+
THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
170+
THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
171+
int dT = luaT_getfieldcheckint(L, 1, "dT");
172+
int dW = luaT_getfieldcheckint(L, 1, "dW");
173+
int dH = luaT_getfieldcheckint(L, 1, "dH");
174+
int kT = luaT_getfieldcheckint(L, 1, "kT");
175+
int kW = luaT_getfieldcheckint(L, 1, "kW");
176+
int kH = luaT_getfieldcheckint(L, 1, "kH");
177+
THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput",
178+
torch_Tensor);
179+
int nslices;
180+
int itime;
181+
int iheight;
182+
int iwidth;
183+
int otime;
184+
int oheight;
185+
int owidth;
186+
real *gradInput_data;
187+
real *gradOutput_data;
188+
real *indices_data;
189+
190+
int dimN = 0;
191+
int dimt = 1;
192+
int dimh = 2;
193+
int dimw = 3;
194+
195+
/* get contiguous gradOutput */
196+
gradOutput = THTensor_(newContiguous)(gradOutput);
197+
198+
/* resize */
199+
THTensor_(resizeAs)(gradInput, input);
200+
THTensor_(zero)(gradInput);
201+
202+
if (input->nDimension == 5) {
203+
dimN++;
204+
dimt++;
205+
dimh++;
206+
dimw++;
207+
}
208+
209+
/* sizes */
210+
nslices = input->size[dimN];
211+
itime = input->size[dimt];
212+
iheight = input->size[dimh];
213+
iwidth = input->size[dimw];
214+
otime = gradOutput->size[dimt];
215+
oheight = gradOutput->size[dimh];
216+
owidth = gradOutput->size[dimw];
217+
218+
/* get raw pointers */
219+
gradInput_data = THTensor_(data)(gradInput);
220+
gradOutput_data = THTensor_(data)(gradOutput);
221+
222+
/* backprop */
223+
if (input->nDimension == 4) { /* non-batch mode*/
224+
nn_(VolumetricAveragePooling_updateGradInput_frame)(
225+
gradInput_data, gradOutput_data, nslices,
226+
itime, iwidth, iheight, otime, owidth, oheight,
227+
kT, kW, kH, dT, dW, dH);
228+
} else { /* batch mode */
229+
long p;
230+
long nBatch = input->size[0];
231+
232+
long istride = nslices * itime * iwidth * iheight;
233+
long ostride = nslices * otime * owidth * oheight;
234+
235+
#pragma omp parallel for private(p)
236+
for (p = 0; p < nBatch; p++) {
237+
nn_(VolumetricAveragePooling_updateGradInput_frame)(
238+
gradInput_data + p * istride, gradOutput_data + p * ostride, nslices,
239+
itime, iwidth, iheight, otime, owidth, oheight,
240+
kT, kW, kH, dT, dW, dH);
241+
}
242+
}
243+
244+
/* cleanup */
245+
THTensor_(free)(gradOutput);
246+
return 1;
247+
}
248+
249+
static const struct luaL_Reg nn_(VolumetricAveragePooling__) [] = {
250+
{"VolumetricAveragePooling_updateOutput",
251+
nn_(VolumetricAveragePooling_updateOutput)},
252+
{"VolumetricAveragePooling_updateGradInput",
253+
nn_(VolumetricAveragePooling_updateGradInput)},
254+
{NULL, NULL}
255+
};
256+
257+
static void nn_(VolumetricAveragePooling_init)(lua_State *L) {
258+
luaT_pushmetatable(L, torch_Tensor);
259+
luaT_registeratname(L, nn_(VolumetricAveragePooling__), "nn");
260+
lua_pop(L,1);
261+
}
262+
263+
#endif

0 commit comments

Comments
 (0)