sprinkle functions with basic documentation

lisurui6 · Mar 15, 2014 · 51511d0 · 51511d0
1 parent ef30790
commit 51511d0
Show file tree

Hide file tree

Showing 18 changed files with 164 additions and 30 deletions.
diff --git a/bbox_regression/rcnn_predict_bbox_regressor.m b/bbox_regression/rcnn_predict_bbox_regressor.m
@@ -1,7 +1,8 @@
 function pred_boxes = ...
     rcnn_predict_bbox_regressor(model, feat, ex_boxes)
-% rcnn_predict_bbox_regressor - compute predicted bounding box
-%   pred_boxes = rcnn_predict_bbox_regressor(model, feat, ex_boxes)
+% pred_boxes = rcnn_predict_bbox_regressor(model, feat, ex_boxes)
+%   Predicts a new bounding box from CNN features computed on input
+%   bounding boxes.
 %   
 %   Inputs
 %   model     Bounding box regressor from rcnn_train_bbox_regressor.m

diff --git a/bbox_regression/rcnn_train_bbox_regressor.m b/bbox_regression/rcnn_train_bbox_regressor.m
@@ -1,4 +1,17 @@
 function bbox_reg = rcnn_train_bbox_regressor(imdb, rcnn_model, varargin)
+% bbox_reg = rcnn_train_bbox_regressor(imdb, rcnn_model, varargin)
+%   Trains a bounding box regressor on the image database imdb
+%   for use with the R-CNN model rcnn_model. The regressor is trained
+%   using ridge regression.
+%
+%   Keys that can be passed in:
+%
+%   min_overlap     Proposal boxes with this much overlap or more are used
+%   layer           The CNN layer features to regress from (either 5, 6 or 7)
+%   lambda          The regularization hyperparameter in ridge regression
+%   robust          Throw away examples with loss in the top [robust]-quantile
+%   binarize        Binarize features or leave as real values >= 0
+
 % AUTORIGHTS
 % ---------------------------------------------------------
 % Copyright (c) 2014, Ross Girshick

diff --git a/experiments/rcnn_exp_bbox_reg_train_and_test.m b/experiments/rcnn_exp_bbox_reg_train_and_test.m
@@ -1,4 +1,6 @@
 function res = rcnn_exp_bbox_reg_train_and_test()
+% Runs an experiment that trains a bounding box regressor and
+% tests it.
 
 % change to point to your VOCdevkit install
 VOCdevkit = './datasets/VOCdevkit2007';

diff --git a/finetuning/rcnn_make_window_file.m b/finetuning/rcnn_make_window_file.m
@@ -1,6 +1,18 @@
 function rcnn_make_window_file(imdb, out_dir)
-% Makes a window file that can be used by the caffe WindowDataLayer for
-% finetuning.
+% rcnn_make_window_file(imdb, out_dir)
+%   Makes a window file that can be used by the caffe WindowDataLayer 
+%   for finetuning.
+%
+%   The window file format contains repeated blocks of:
+%
+%     # image_index 
+%     img_path
+%     channels 
+%     height 
+%     width
+%     num_windows
+%     class_index overlap x1 y1 x2 y2
+%     <... num_windows-1 more windows follow ...>
 
 % AUTORIGHTS
 % ---------------------------------------------------------
@@ -14,15 +26,6 @@ function rcnn_make_window_file(imdb, out_dir)
 
 roidb = imdb.roidb_func(imdb);
 
-% window_file format
-%  # image_index 
-%  img_path
-%  channels 
-%  height 
-%  width
-%  num_windows
-%  class_index overlap x1 y1 x2 y2
-
 window_file = sprintf('%s/window_file_%s.txt', ...
     out_dir, imdb.name);
 fid = fopen(window_file, 'wt');

diff --git a/imdb/imdb_eval_voc.m b/imdb/imdb_eval_voc.m
@@ -1,4 +1,19 @@
 function res = imdb_eval_voc(cls, boxes, imdb, suffix)
+% res = imdb_eval_voc(cls, boxes, imdb, suffix)
+%   Use the VOCdevkit to evaluate detections specified in boxes
+%   for class cls against the ground-truth boxes in the image
+%   database imdb. Results files are saved with an optional
+%   suffix.
+
+% AUTORIGHTS
+% ---------------------------------------------------------
+% Copyright (c) 2014, Ross Girshick
+% 
+% This file is part of the R-CNN code and is available 
+% under the terms of the Simplified BSD License provided in 
+% LICENSE. Please retain this notice and LICENSE if you use 
+% this file (or any portion of it) in your project.
+% ---------------------------------------------------------
 
 % Add a random string ("salt") to the end of the results file name
 % to prevent concurrent evaluations from clobbering each other

diff --git a/imdb/imdb_from_voc.m b/imdb/imdb_from_voc.m
@@ -1,4 +1,19 @@
 function imdb = imdb_from_voc(root_dir, image_set, year)
+% imdb = imdb_from_voc(root_dir, image_set, year)
+%   Builds an image database for the PASCAL VOC devkit located
+%   at root_dir using the image_set and year.
+%
+%   Inspired by Andrea Vedaldi's MKL imdb and roidb code.
+
+% AUTORIGHTS
+% ---------------------------------------------------------
+% Copyright (c) 2014, Ross Girshick
+% 
+% This file is part of the R-CNN code and is available 
+% under the terms of the Simplified BSD License provided in 
+% LICENSE. Please retain this notice and LICENSE if you use 
+% this file (or any portion of it) in your project.
+% ---------------------------------------------------------
 
 %imdb.name = 'voc_train_2007'
 %imdb.image_dir = '/work4/rbg/VOC2007/VOCdevkit/VOC2007/JPEGImages/'

diff --git a/imdb/roidb_from_voc.m b/imdb/roidb_from_voc.m
@@ -1,4 +1,20 @@
 function roidb = roidb_from_voc(imdb)
+% roidb = roidb_from_voc(imdb)
+%   Builds an regions of interest database from imdb image
+%   database. Uses precomputed selective search boxes available
+%   in the R-CNN data package.
+%
+%   Inspired by Andrea Vedaldi's MKL imdb and roidb code.
+
+% AUTORIGHTS
+% ---------------------------------------------------------
+% Copyright (c) 2014, Ross Girshick
+% 
+% This file is part of the R-CNN code and is available 
+% under the terms of the Simplified BSD License provided in 
+% LICENSE. Please retain this notice and LICENSE if you use 
+% this file (or any portion of it) in your project.
+% ---------------------------------------------------------
 
 cache_file = ['./imdb/cache/roidb_' imdb.name];
 try

diff --git a/rcnn_cache_pool5_features.m b/rcnn_cache_pool5_features.m
@@ -1,4 +1,18 @@
 function rcnn_cache_pool5_features(imdb, varargin)
+% rcnn_cache_pool5_features(imdb, varargin)
+%   Computes pool5 features and saves them to disk. We compute
+%   pool5 features because we can easily compute fc6 and fc7
+%   features from them on-the-fly and they tend to compress better
+%   than fc6 or fc7 features due to greater sparsity.
+%
+%   Keys that can be passed in:
+%
+%   start             Index of the first image in imdb to process
+%   end               Index of the last image in imdb to process
+%   crop_mode         Crop mode (either 'warp' or 'square')
+%   crop_padding      Amount of padding in crop
+%   net_file          Path to the Caffe CNN to use
+%   cache_name        Path to the precomputed feature cache
 
 % AUTORIGHTS
 % ---------------------------------------------------------
@@ -10,7 +24,6 @@ function rcnn_cache_pool5_features(imdb, varargin)
 % this file (or any portion of it) in your project.
 % ---------------------------------------------------------
 
-
 ip = inputParser;
 ip.addRequired('imdb', @isstruct);
 ip.addOptional('start', 1, @isscalar);

diff --git a/rcnn_config.m b/rcnn_config.m
@@ -31,15 +31,15 @@
 % ---------------------------------------------------------
 
 % ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-% Defaults config
+% Defaults config (override in rcnn_config_local.m)
 % ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-% 
-% If you want to override any of these, create a script named 
-% rcnn_config_local.m and redefine these variables there.
-%
+% If you want to override any of these, create a **script** 
+% named rcnn_config_local.m and redefine these variables there.
+
 % Experiments directory. The directory under which most outputs
 % generated by running this code will go.
 EXP_DIR = './cachedir';
+
 % Set to false if you do not want to use a GPU.
 USE_GPU = true;
 

diff --git a/rcnn_extract_regions.m b/rcnn_extract_regions.m
@@ -1,9 +1,15 @@
 function [batches, batch_padding] = rcnn_extract_regions(im, boxes, rcnn_model)
-% Extract image regions and preprocess them for use in caffe.
-% Output is a cell array of batches.
-% Each batch is a 4-D tensor formatted for input into caffe
-% (format: BGR channel order; single precision; mean subtracted;
-%  dimensions from fastest to slowest: width, height, channels, batch_index)
+% [batches, batch_padding] = rcnn_extract_regions(im, boxes, rcnn_model)
+%   Extract image regions and preprocess them for use in Caffe.
+%   Output is a cell array of batches.
+%   Each batch is a 4-D tensor formatted for input into Caffe:
+%     - BGR channel order
+%     - single precision
+%     - mean subtracted
+%     - dimensions from fastest to slowest: width, height, channel, batch_index
+%
+%   im is an image in RGB order as returned by imread
+%   boxes are in [x1 y1 x2 y2] format with one box per row
 
 % AUTORIGHTS
 % ---------------------------------------------------------

diff --git a/rcnn_features.m b/rcnn_features.m
@@ -1,4 +1,11 @@
 function feat = rcnn_features(im, boxes, rcnn_model)
+% feat = rcnn_features(im, boxes, rcnn_model)
+%   Compute CNN features on a set of boxes.
+%
+%   im is an image in RGB order as returned by imread
+%   boxes are in [x1 y1 x2 y2] format with one box per row
+%   rcnn_model specifies the CNN Caffe net file to use.
+
 % AUTORIGHTS
 % ---------------------------------------------------------
 % Copyright (c) 2014, Ross Girshick

diff --git a/rcnn_im_crop.m b/rcnn_im_crop.m
@@ -1,4 +1,17 @@
-function window = rcnn_im_crop(im, bbox, crop_mode, crop_size, padding, image_mean)
+function window = ...
+    rcnn_im_crop(im, bbox, crop_mode, crop_size, padding, image_mean)
+% window = rcnn_im_crop(im, bbox, crop_mode, crop_size, padding, image_mean)
+%   Crops a window specified by bbox (in [x1 y1 x2 y2] order) out of im.
+%
+%   crop_mode can be either 'warp' or 'square'
+%   crop_size determines the size of the output window: crop_size x crop_size
+%   padding is the amount of padding to include at the target scale
+%   image_mean to subtract from the cropped window
+%
+%   N.B. this should be as identical as possible to the cropping 
+%   implementation in Caffe's WindowDataLayer, which is used while
+%   fine-tuning.
+
 % AUTORIGHTS
 % ---------------------------------------------------------
 % Copyright (c) 2014, Ross Girshick
@@ -66,6 +79,8 @@
 end % padding > 0 || square
 
 window = im(bbox(2):bbox(4), bbox(1):bbox(3), :);
+% We turn off antialiasing to better match OpenCV's bilinear 
+% interpolation that is used in Caffe's WindowDataLayer.
 tmp = imresize(window, [crop_height crop_width], ...
     'bilinear', 'antialiasing', false);
 if ~isempty(image_mean)

diff --git a/rcnn_load_cached_pool5_features.m b/rcnn_load_cached_pool5_features.m
@@ -1,5 +1,7 @@
 function d = rcnn_load_cached_pool5_features(cache_name, imdb_name, id)
-% loads feat_cache/[cache_name]/[split]/[id].mat
+% d = rcnn_load_cached_pool5_features(cache_name, imdb_name, id)
+%   loads cached pool5 features from:
+%   feat_cache/[cache_name]/[imdb_name]/[id].mat
 
 % AUTORIGHTS
 % ---------------------------------------------------------

diff --git a/rcnn_load_model.m b/rcnn_load_model.m
@@ -1,4 +1,11 @@
 function rcnn_model = rcnn_load_model(rcnn_model_or_file, use_gpu)
+% rcnn_model = rcnn_load_model(rcnn_model_or_file, use_gpu)
+%   Takes an rcnn_model structure and loads the associated Caffe
+%   CNN into memory. Since this is nasty global state that is carried
+%   around, a randomly generated 'key' (or handle) is returned.
+%   Before making calls to caffe it's a good idea to check that
+%   rcnn_model.cnn.key is the same as caffe('get_init_key').
+
 % AUTORIGHTS
 % ---------------------------------------------------------
 % Copyright (c) 2014, Ross Girshick

diff --git a/rcnn_pool5_to_fcX.m b/rcnn_pool5_to_fcX.m
@@ -1,4 +1,8 @@
 function feat = rcnn_pool5_to_fcX(feat, layer, rcnn_model)
+% feat = rcnn_pool5_to_fcX(feat, layer, rcnn_model)
+%   On-the-fly conversion of pool5 features to fc6 or fc7
+%   using the weights and biases stored in rcnn_model.cnn.layers.
+
 % AUTORIGHTS
 % ---------------------------------------------------------
 % Copyright (c) 2014, Ross Girshick

diff --git a/rcnn_scale_features.m b/rcnn_scale_features.m
@@ -1,5 +1,4 @@
 function f = rcnn_scale_features(f, feat_norm_mean)
-
 % My initial experiments were conducted on features with an average norm
 % very close to 20. Using those features, I determined a good range of SVM
 % C values to cross-validate over. Features from different layers end up

diff --git a/rcnn_test.m b/rcnn_test.m
@@ -1,4 +1,9 @@
 function res = rcnn_test(rcnn_model, imdb, suffix)
+% res = rcnn_test(rcnn_model, imdb, suffix)
+%   Compute test results using the trained rcnn_model on the
+%   image database specified by imdb. Results are saved
+%   with an optional suffix.
+
 % AUTORIGHTS
 % ---------------------------------------------------------
 % Copyright (c) 2014, Ross Girshick

diff --git a/rcnn_train.m b/rcnn_train.m
@@ -1,8 +1,20 @@
 function [rcnn_model, rcnn_k_fold_model] = ...
     rcnn_train(imdb, varargin)
-% rcnn_model = rcnn_train(imdb, varargin)
+% [rcnn_model, rcnn_k_fold_model] = rcnn_train(imdb, varargin)
+%   Trains an R-CNN detector for all classes in the imdb.
+%   
+%   Keys that can be passed in:
 %
-% Train all classes at once.
+%   svm_C             SVM regularization parameter
+%   bias_mult         Bias feature value (for liblinear)
+%   pos_loss_weight   Cost factor on hinge loss for positives
+%   layer             Feature layer to use (either 5, 6 or 7)
+%   k_folds           Train on folds of the imdb
+%   checkpoint        Save the rcnn_model every checkpoint images
+%   crop_mode         Crop mode (either 'warp' or 'square')
+%   crop_padding      Amount of padding in crop
+%   net_file          Path to the Caffe CNN to use
+%   cache_name        Path to the precomputed feature cache
 
 % AUTORIGHTS
 % ---------------------------------------------------------
@@ -14,7 +26,6 @@
 % this file (or any portion of it) in your project.
 % ---------------------------------------------------------
 
-
 % TODO:
 %  - allow training just a subset of classes