BVLC · Bensonlp · Feb 3, 2016 · Feb 3, 2016 · Feb 4, 2016 · Feb 4, 2016
diff --git a/.gitignore b/.gitignore
@@ -96,3 +96,8 @@ LOCK
 LOG*
 CURRENT
 MANIFEST-*
+
+# temporary directories
+jobs
+temp
+examples/*/*lmdb
diff --git a/LICENSE b/LICENSE
@@ -1,5 +1,11 @@
 COPYRIGHT
 
+All new contributions compared to the original branch:
+Copyright (c) 2015, 2016 Wei Liu (UNC Chapel Hill), Dragomir Anguelov (Zoox),
+Dumitru Erhan (Google), Christian Szegedy (Google), Scott Reed (UMich Ann Arbor),
+Cheng-Yang Fu (UNC Chapel Hill), Alexander C. Berg (UNC Chapel Hill).
+All rights reserved.
+
 All contributions by the University of California:
 Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
 All rights reserved.

diff --git a/Makefile b/Makefile
@@ -178,7 +178,7 @@ ifneq ($(CPU_ONLY), 1)
 	LIBRARIES := cudart cublas curand
 endif
 
-LIBRARIES += glog gflags protobuf boost_system boost_filesystem m hdf5_hl hdf5
+LIBRARIES += glog gflags protobuf boost_system boost_filesystem boost_regex m hdf5_hl hdf5
 
 # handle IO dependencies
 USE_LEVELDB ?= 1
@@ -195,7 +195,7 @@ ifeq ($(USE_OPENCV), 1)
 	LIBRARIES += opencv_core opencv_highgui opencv_imgproc
 
 	ifeq ($(OPENCV_VERSION), 3)
-		LIBRARIES += opencv_imgcodecs
+		LIBRARIES += opencv_imgcodecs opencv_videoio
 	endif
 
 endif
@@ -404,7 +404,7 @@ LIBRARY_DIRS += $(LIB_BUILD_DIR)
 CXXFLAGS += -MMD -MP
 
 # Complete build flags.
-COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
+COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-isystem $(includedir))
 CXXFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS)
 NVCCFLAGS += -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
 # mex may invoke an older gcc that is too liberal with -Wuninitalized

diff --git a/Makefile.config.example b/Makefile.config.example
@@ -31,19 +31,21 @@ CUDA_DIR := /usr/local/cuda
 # CUDA_DIR := /usr
 
 # CUDA architecture setting: going with all of them.
-# For CUDA < 6.0, comment the *_50 lines for compatibility.
+# For CUDA < 6.0, comment the lines after *_35 for compatibility.
 CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
-		-gencode arch=compute_20,code=sm_21 \
-		-gencode arch=compute_30,code=sm_30 \
-		-gencode arch=compute_35,code=sm_35 \
-		-gencode arch=compute_50,code=sm_50 \
-		-gencode arch=compute_50,code=compute_50
+             -gencode arch=compute_20,code=sm_21 \
+             -gencode arch=compute_30,code=sm_30 \
+             -gencode arch=compute_35,code=sm_35 \
+             -gencode arch=compute_50,code=sm_50 \
+             -gencode arch=compute_52,code=sm_52 \
+             -gencode arch=compute_61,code=sm_61
 
 # BLAS choice:
 # atlas for ATLAS (default)
 # mkl for MKL
 # open for OpenBlas
-BLAS := atlas
+# BLAS := atlas
+BLAS := open
 # Custom (MKL/ATLAS/OpenBLAS) include and lib directories.
 # Leave commented to accept the defaults for your choice of BLAS
 # (which should work)!
@@ -65,10 +67,10 @@ PYTHON_INCLUDE := /usr/include/python2.7 \
 		/usr/lib/python2.7/dist-packages/numpy/core/include
 # Anaconda Python distribution is quite popular. Include path:
 # Verify anaconda location, sometimes it's in root.
-# ANACONDA_HOME := $(HOME)/anaconda
+# ANACONDA_HOME := $(HOME)/anaconda2
 # PYTHON_INCLUDE := $(ANACONDA_HOME)/include \
-		# $(ANACONDA_HOME)/include/python2.7 \
-		# $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include \
+		$(ANACONDA_HOME)/include/python2.7 \
+		$(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include \
 
 # Uncomment to use Python 3 (default is Python 2)
 # PYTHON_LIBRARIES := boost_python3 python3.5m

diff --git a/README.md b/README.md
@@ -1,37 +1,143 @@
-# Caffe
+# SSD: Single Shot MultiBox Detector
 
-[![Build Status](https://travis-ci.org/BVLC/caffe.svg?branch=master)](https://travis-ci.org/BVLC/caffe)
+[![Build Status](https://travis-ci.org/weiliu89/caffe.svg?branch=ssd)](https://travis-ci.org/weiliu89/caffe)
 [![License](https://img.shields.io/badge/license-BSD-blue.svg)](LICENSE)
 
-Caffe is a deep learning framework made with expression, speed, and modularity in mind.
-It is developed by the Berkeley Vision and Learning Center ([BVLC](http://bvlc.eecs.berkeley.edu)) and community contributors.
+By [Wei Liu](http://www.cs.unc.edu/~wliu/), [Dragomir Anguelov](https://www.linkedin.com/in/dragomiranguelov), [Dumitru Erhan](http://research.google.com/pubs/DumitruErhan.html), [Christian Szegedy](http://research.google.com/pubs/ChristianSzegedy.html), [Scott Reed](http://www-personal.umich.edu/~reedscot/), [Cheng-Yang Fu](http://www.cs.unc.edu/~cyfu/), [Alexander C. Berg](http://acberg.com).
 
-Check out the [project site](http://caffe.berkeleyvision.org) for all the details like
+### Introduction
 
-- [DIY Deep Learning for Vision with Caffe](https://docs.google.com/presentation/d/1UeKXVgRvvxg9OUdh_UiC5G71UMscNPlvArsWER41PsU/edit#slide=id.p)
-- [Tutorial Documentation](http://caffe.berkeleyvision.org/tutorial/)
-- [BVLC reference models](http://caffe.berkeleyvision.org/model_zoo.html) and the [community model zoo](https://github.com/BVLC/caffe/wiki/Model-Zoo)
-- [Installation instructions](http://caffe.berkeleyvision.org/installation.html)
+SSD is an unified framework for object detection with a single network. You can use the code to train/evaluate a network for object detection task. For more details, please refer to our [arXiv paper](http://arxiv.org/abs/1512.02325) and our [slide](http://www.cs.unc.edu/~wliu/papers/ssd_eccv2016_slide.pdf).
 
-and step-by-step examples.
+<p align="center">
+<img src="http://www.cs.unc.edu/~wliu/papers/ssd.png" alt="SSD Framework" width="600px">
+</p>
 
-[![Join the chat at https://gitter.im/BVLC/caffe](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/BVLC/caffe?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+| System | VOC2007 test *mAP* | **FPS** (Titan X) | Number of Boxes | Input resolution
+|:-------|:-----:|:-------:|:-------:|:-------:|
+| [Faster R-CNN (VGG16)](https://github.com/ShaoqingRen/faster_rcnn) | 73.2 | 7 | ~6000 | ~1000 x 600 |
+| [YOLO (customized)](http://pjreddie.com/darknet/yolo/) | 63.4 | 45 | 98 | 448 x 448 |
+| SSD300* (VGG16) | 77.2 | 46 | 8732 | 300 x 300 |
+| SSD512* (VGG16) | **79.8** | 19 | 24564 | 512 x 512 |
 
-Please join the [caffe-users group](https://groups.google.com/forum/#!forum/caffe-users) or [gitter chat](https://gitter.im/BVLC/caffe) to ask questions and talk about methods and models.
-Framework development discussions and thorough bug reports are collected on [Issues](https://github.com/BVLC/caffe/issues).
 
-Happy brewing!
+<p align="left">
+<img src="http://www.cs.unc.edu/~wliu/papers/ssd_results.png" alt="SSD results on multiple datasets" width="800px">
+</p>
 
-## License and Citation
+_Note: SSD300* and SSD512* are the latest models. Current code should reproduce these results._
 
-Caffe is released under the [BSD 2-Clause license](https://github.com/BVLC/caffe/blob/master/LICENSE).
-The BVLC reference models are released for unrestricted use.
+### Citing SSD
 
-Please cite Caffe in your publications if it helps your research:
+Please cite SSD in your publications if it helps your research:
 
-    @article{jia2014caffe,
-      Author = {Jia, Yangqing and Shelhamer, Evan and Donahue, Jeff and Karayev, Sergey and Long, Jonathan and Girshick, Ross and Guadarrama, Sergio and Darrell, Trevor},
-      Journal = {arXiv preprint arXiv:1408.5093},
-      Title = {Caffe: Convolutional Architecture for Fast Feature Embedding},
-      Year = {2014}
+    @inproceedings{liu2016ssd,
+      title = {{SSD}: Single Shot MultiBox Detector},
+      author = {Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C.},
+      booktitle = {ECCV},
+      year = {2016}
     }
+
+### Contents
+1. [Installation](#installation)
+2. [Preparation](#preparation)
+3. [Train/Eval](#traineval)
+4. [Models](#models)
+
+### Installation
+1. Get the code. We will call the directory that you cloned Caffe into `$CAFFE_ROOT`
+  ```Shell
+  git clone https://github.com/weiliu89/caffe.git
+  cd caffe
+  git checkout ssd
+  ```
+
+2. Build the code. Please follow [Caffe instruction](http://caffe.berkeleyvision.org/installation.html) to install all necessary packages and build it.
+  ```Shell
+  # Modify Makefile.config according to your Caffe installation.
+  cp Makefile.config.example Makefile.config
+  make -j8
+  # Make sure to include $CAFFE_ROOT/python to your PYTHONPATH.
+  make py
+  make test -j8
+  # (Optional)
+  make runtest -j8
+  ```
+
+### Preparation
+1. Download [fully convolutional reduced (atrous) VGGNet](https://gist.github.com/weiliu89/2ed6e13bfd5b57cf81d6). By default, we assume the model is stored in `$CAFFE_ROOT/models/VGGNet/`
+
+2. Download VOC2007 and VOC2012 dataset. By default, we assume the data is stored in `$HOME/data/`
+  ```Shell
+  # Download the data.
+  cd $HOME/data
+  wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
+  wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
+  wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
+  # Extract the data.
+  tar -xvf VOCtrainval_11-May-2012.tar
+  tar -xvf VOCtrainval_06-Nov-2007.tar
+  tar -xvf VOCtest_06-Nov-2007.tar
+  ```
+
+3. Create the LMDB file.
+  ```Shell
+  cd $CAFFE_ROOT
+  # Create the trainval.txt, test.txt, and test_name_size.txt in data/VOC0712/
+  ./data/VOC0712/create_list.sh
+  # You can modify the parameters in create_data.sh if needed.
+  # It will create lmdb files for trainval and test with encoded original image:
+  #   - $HOME/data/VOCdevkit/VOC0712/lmdb/VOC0712_trainval_lmdb
+  #   - $HOME/data/VOCdevkit/VOC0712/lmdb/VOC0712_test_lmdb
+  # and make soft links at examples/VOC0712/
+  ./data/VOC0712/create_data.sh
+  ```
+
+### Train/Eval
+1. Train your model and evaluate the model on the fly.
+  ```Shell
+  # It will create model definition files and save snapshot models in:
+  #   - $CAFFE_ROOT/models/VGGNet/VOC0712/SSD_300x300/
+  # and job file, log file, and the python script in:
+  #   - $CAFFE_ROOT/jobs/VGGNet/VOC0712/SSD_300x300/
+  # and save temporary evaluation results in:
+  #   - $HOME/data/VOCdevkit/results/VOC2007/SSD_300x300/
+  # It should reach 77.* mAP at 120k iterations.
+  python examples/ssd/ssd_pascal.py
+  ```
+  If you don't have time to train your model, you can download a pre-trained model at [here](https://drive.google.com/open?id=0BzKzrI_SkD1_WVVTSmQxU0dVRzA).
+
+2. Evaluate the most recent snapshot.
+  ```Shell
+  # If you would like to test a model you trained, you can do:
+  python examples/ssd/score_ssd_pascal.py
+  ```
+
+3. Test your model using a webcam. Note: press <kbd>esc</kbd> to stop.
+  ```Shell
+  # If you would like to attach a webcam to a model you trained, you can do:
+  python examples/ssd/ssd_pascal_webcam.py
+  ```
+  [Here](https://drive.google.com/file/d/0BzKzrI_SkD1_R09NcjM1eElLcWc/view) is a demo video of running a SSD500 model trained on [MSCOCO](http://mscoco.org) dataset.
+
+4. Check out [`examples/ssd_detect.ipynb`](https://github.com/weiliu89/caffe/blob/ssd/examples/ssd_detect.ipynb) or [`examples/ssd/ssd_detect.cpp`](https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_detect.cpp) on how to detect objects using a SSD model. Check out [`examples/ssd/plot_detections.py`](https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/plot_detections.py) on how to plot detection results output by ssd_detect.cpp.
+
+5. To train on other dataset, please refer to data/OTHERDATASET for more details. We currently add support for COCO and ILSVRC2016. We recommend using [`examples/ssd.ipynb`](https://github.com/weiliu89/caffe/blob/ssd/examples/ssd_detect.ipynb) to check whether the new dataset is prepared correctly.
+
+### Models
+We have provided the latest models that are trained from different datasets. To help reproduce the results in [Table 6](https://arxiv.org/pdf/1512.02325v4.pdf), most models contain a pretrained `.caffemodel` file, many `.prototxt` files, and python scripts.
+
+1. PASCAL VOC models:
+   * 07+12: [SSD300*](https://drive.google.com/open?id=0BzKzrI_SkD1_WVVTSmQxU0dVRzA), [SSD512*](https://drive.google.com/open?id=0BzKzrI_SkD1_ZDIxVHBEcUNBb2s)
+   * 07++12: [SSD300*](https://drive.google.com/open?id=0BzKzrI_SkD1_WnR2T1BGVWlCZHM), [SSD512*](https://drive.google.com/open?id=0BzKzrI_SkD1_MjFjNTlnempHNWs)
+   * COCO<sup>[1]</sup>: [SSD300*](https://drive.google.com/open?id=0BzKzrI_SkD1_NDlVeFJDc2tIU1k), [SSD512*](https://drive.google.com/open?id=0BzKzrI_SkD1_TW4wTC14aDdCTDQ)
+   * 07+12+COCO: [SSD300*](https://drive.google.com/open?id=0BzKzrI_SkD1_UFpoU01yLS1SaG8), [SSD512*](https://drive.google.com/open?id=0BzKzrI_SkD1_X3ZXQUUtM0xNeEk)
+   * 07++12+COCO: [SSD300*](https://drive.google.com/open?id=0BzKzrI_SkD1_TkFPTEQ1Z091SUE), [SSD512*](https://drive.google.com/open?id=0BzKzrI_SkD1_NVVNdWdYNEh1WTA)
+
+2. COCO models:
+   * trainval35k: [SSD300*](https://drive.google.com/open?id=0BzKzrI_SkD1_dUY1Ml9GRTFpUWc), [SSD512*](https://drive.google.com/open?id=0BzKzrI_SkD1_dlJpZHJzOXd3MTg)
+
+3. ILSVRC models:
+   * trainval1: [SSD300*](https://drive.google.com/open?id=0BzKzrI_SkD1_a2NKQ2d1d043VXM), [SSD500](https://drive.google.com/open?id=0BzKzrI_SkD1_X2ZCLVgwLTgzaTQ)
+
+<sup>[1]</sup>We use [`examples/convert_model.ipynb`](https://github.com/weiliu89/caffe/blob/ssd/examples/convert_model.ipynb) to extract a VOC model from a pretrained COCO model.
diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
@@ -4,7 +4,7 @@ endif()
 
 # Known NVIDIA GPU achitectures Caffe can be compiled for.
 # This list will be used for CUDA_ARCH_NAME = All option
-set(Caffe_known_gpu_archs "20 21(20) 30 35 50")
+set(Caffe_known_gpu_archs "20 21(20) 30 35 50 52 61")
 
 ################################################################################################
 # A function for automatic detection of GPUs installed  (if autodetection is enabled)

diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
@@ -2,7 +2,7 @@
 set(Caffe_LINKER_LIBS "")
 
 # ---[ Boost
-find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
+find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem regex)
 include_directories(SYSTEM ${Boost_INCLUDE_DIR})
 list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
 
@@ -69,7 +69,7 @@ endif()
 
 # ---[ OpenCV
 if(USE_OPENCV)
-  find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs)
+  find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs videoio)
   if(NOT OpenCV_FOUND) # if not OpenCV 3.x, then imgcodecs are not found
     find_package(OpenCV REQUIRED COMPONENTS core highgui imgproc)
   endif()

diff --git a/data/ILSVRC2016/README.md b/data/ILSVRC2016/README.md
@@ -0,0 +1,29 @@
+### Preparation
+#### ILSVRC2016
+We encourage you to register [ILSVRC2016](http://image-net.org/challenges/LSVRC/2016) and download the DET dataset. By default, we assume the data is stored in `$HOME/data/ILSVRC` and will call it `$ILSVRC_ROOT`.
+
+#### ILSVRC2015
+If you choose to use ILSVRC2015 DET dataset, here are a few noticeable steps before running the following scripts:
+
+1. There are a few problematic images. You can download the fixed ones [here](http://www.cs.unc.edu/~wliu/projects/SSD/ILSVRC2015_DET_fix.tar.gz).
+
+2. You should download the [val1/val2 split](http://www.cs.unc.edu/~wliu/projects/SSD/ILSVRC2015_DET_val1_val2.tar.gz), courtesy of [Ross Girshick](http://people.eecs.berkeley.edu/~rbg), and put it in `$ILSVRC_ROOT/ImageSets/DET`.
+
+### Remove an invalid file
+Find the invalid image file `Data/DET/val/ILSVRC2013_val_00004542.JPEG`, and remove it.
+
+### Create the LMDB file.
+After you have downloaded the dataset, we can create the lmdb files.
+
+  ```Shell
+  cd $CAFFE_ROOT
+  # Create the trainval1.txt, val2.txt, val2_name_size.txt, test.txt and test_name_size.txt in data/ILSVRC2016/
+  python data/ILSVRC2016/create_list.py
+  # You can modify the parameters in create_data.sh if needed.
+  # It will create lmdb files for trainval1, val2 and test with encoded original image:
+  #   - $HOME/data/ILSVRC/lmdb/DET/ILSVRC2016_trainval1_lmdb
+  #   - $HOME/data/ILSVRC/lmdb/DET/ILSVRC2016_val2_lmdb
+  #   - $HOME/data/ILSVRC/lmdb/DET/ILSVRC2016_test_lmdb
+  # and make soft links at examples/ILSVRC2016/
+  ./data/ILSVRC2016/create_data.sh
+  ```
diff --git a/data/ILSVRC2016/create_data.sh b/data/ILSVRC2016/create_data.sh
@@ -0,0 +1,30 @@
+cur_dir=$(cd $( dirname ${BASH_SOURCE[0]} ) && pwd )
+root_dir=$cur_dir/../..
+
+cd $root_dir
+
+redo=false
+data_root_dir="$HOME/data/ILSVRC"
+dataset_name="ILSVRC2016"
+mapfile="$root_dir/data/$dataset_name/labelmap_ilsvrc_det.prototxt"
+db="lmdb"
+min_dim=0
+max_dim=0
+width=0
+height=0
+
+extra_cmd="--encode-type=jpg --encoded"
+if $redo
+then
+  extra_cmd="$extra_cmd --redo"
+fi
+
+for dataset in test
+do
+  python $root_dir/scripts/create_annoset.py --anno-type="classification" --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim --resize-width=$width --resize-height=$height --check-label $extra_cmd $data_root_dir $root_dir/data/$dataset_name/$dataset".txt" $data_root_dir/$db/DET/$dataset_name"_"$dataset"_"$db examples/$dataset_name 2>&1 | tee $root_dir/data/$dataset_name/$dataset.log
+done
+
+for dataset in val2 trainval1
+do
+  python $root_dir/scripts/create_annoset.py --anno-type="detection" --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim --resize-width=$width --resize-height=$height --check-label $extra_cmd $data_root_dir $root_dir/data/$dataset_name/$dataset".txt" $data_root_dir/$db/DET/$dataset_name"_"$dataset"_"$db examples/$dataset_name 2>&1 | tee $root_dir/data/$dataset_name/$dataset.log
+done