From d7da0924486f762d810e78ad55be5206c3e5198e Mon Sep 17 00:00:00 2001
From: Noiredd <snowball91b@gmail.com>
Date: Wed, 7 Mar 2018 13:57:56 +0100
Subject: [PATCH 01/17] PoolingLayer customizable output shape rounding mode

---
 include/caffe/layers/pooling_layer.hpp |  1 +
 src/caffe/layers/pooling_layer.cpp     | 21 +++++++++++++++++----
 src/caffe/proto/caffe.proto            |  6 ++++++
 3 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/include/caffe/layers/pooling_layer.hpp b/include/caffe/layers/pooling_layer.hpp
index f4d6803ba8e..38a432832cf 100644
--- a/include/caffe/layers/pooling_layer.hpp
+++ b/include/caffe/layers/pooling_layer.hpp
@@ -51,6 +51,7 @@ class PoolingLayer : public Layer<Dtype> {
   int height_, width_;
   int pooled_height_, pooled_width_;
   bool global_pooling_;
+  PoolingParameter_RoundMode round_mode_;
   Blob<Dtype> rand_idx_;
   Blob<int> max_idx_;
 };
diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp
index 90897db0f45..f2a0885771f 100644
--- a/src/caffe/layers/pooling_layer.cpp
+++ b/src/caffe/layers/pooling_layer.cpp
@@ -35,6 +35,7 @@ void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       || (!pool_param.has_stride_h() && !pool_param.has_stride_w()))
       << "Stride is stride OR stride_h and stride_w are required.";
   global_pooling_ = pool_param.global_pooling();
+  round_mode_ = pool_param.round_mode();
   if (global_pooling_) {
     kernel_h_ = bottom[0]->height();
     kernel_w_ = bottom[0]->width();
@@ -87,10 +88,22 @@ void PoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
     kernel_h_ = bottom[0]->height();
     kernel_w_ = bottom[0]->width();
   }
-  pooled_height_ = static_cast<int>(ceil(static_cast<float>(
-      height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
-  pooled_width_ = static_cast<int>(ceil(static_cast<float>(
-      width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
+  switch (round_mode_) {
+  case PoolingParameter_RoundMode_CEIL:
+    pooled_height_ = static_cast<int>(ceil(static_cast<float>(
+        height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
+    pooled_width_ = static_cast<int>(ceil(static_cast<float>(
+        width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
+    break;
+  case PoolingParameter_RoundMode_FLOOR:
+    pooled_height_ = static_cast<int>(floor(static_cast<float>(
+        height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
+    pooled_width_ = static_cast<int>(floor(static_cast<float>(
+        width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
+    break;
+  default:
+    LOG(FATAL) << "Unknown rounding mode.";
+  }
   if (pad_h_ || pad_w_) {
     // If we have padding, ensure that the last pooling starts strictly
     // inside the image (instead of at the padding); otherwise clip the last.
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 22764abc33f..cfef3c00262 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -935,6 +935,12 @@ message PoolingParameter {
   // If global_pooling then it will pool over the size of the bottom by doing
   // kernel_h = bottom->height and kernel_w = bottom->width
   optional bool global_pooling = 12 [default = false];
+  // How to calculate the output size - using ceil (default) or floor rounding.
+  enum RoundMode {
+    CEIL = 0;
+    FLOOR = 1;
+  }
+  optional RoundMode round_mode = 13 [default = CEIL];
 }
 
 message PowerParameter {

From 3318a466309a82d3d63f3b33f3663824da3f1ceb Mon Sep 17 00:00:00 2001
From: Jerry Zhao <jerryz123@berkeley.edu>
Date: Mon, 16 Apr 2018 02:26:08 -0700
Subject: [PATCH 02/17] Cherry-picked USE_HDF5 from Android branch

---
 CMakeLists.txt                            |  3 +++
 Makefile                                  | 12 +++++++++++-
 Makefile.config.example                   |  2 ++
 cmake/ConfigGen.cmake                     | 12 ++++++++++++
 cmake/Dependencies.cmake                  |  8 ++++++++
 cmake/Summary.cmake                       |  2 ++
 include/caffe/util/hdf5.hpp               |  2 ++
 src/caffe/layers/hdf5_data_layer.cpp      |  2 ++
 src/caffe/layers/hdf5_data_layer.cu       |  2 ++
 src/caffe/layers/hdf5_output_layer.cpp    |  2 ++
 src/caffe/layers/hdf5_output_layer.cu     |  2 ++
 src/caffe/net.cpp                         | 13 +++++++++++++
 src/caffe/solvers/sgd_solver.cpp          | 12 ++++++++++++
 src/caffe/test/test_hdf5_output_layer.cpp |  2 ++
 src/caffe/test/test_hdf5data_layer.cpp    |  2 ++
 src/caffe/util/hdf5.cpp                   |  2 ++
 16 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 08f56a33a59..27d172f900b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -42,6 +42,9 @@ caffe_option(USE_LMDB "Build with lmdb" ON)
 caffe_option(ALLOW_LMDB_NOLOCK "Allow MDB_NOLOCK when reading LMDB files (only if necessary)" OFF)
 caffe_option(USE_OPENMP "Link with OpenMP (when your BLAS wants OpenMP and you get linker errors)" OFF)
 
+# This code is taken from https://github.com/sh1r0/caffe-android-lib
+caffe_option(USE_HDF5 "Build with hdf5" ON)
+
 # ---[ Dependencies
 include(cmake/Dependencies.cmake)
 
diff --git a/Makefile b/Makefile
index c85c695acff..29ea8a69a61 100644
--- a/Makefile
+++ b/Makefile
@@ -178,11 +178,13 @@ ifneq ($(CPU_ONLY), 1)
 	LIBRARIES := cudart cublas curand
 endif
 
-LIBRARIES += glog gflags protobuf boost_system boost_filesystem m hdf5_hl hdf5
+LIBRARIES += glog gflags protobuf boost_system boost_filesystem m
 
 # handle IO dependencies
 USE_LEVELDB ?= 1
 USE_LMDB ?= 1
+# This code is taken from https://github.com/sh1r0/caffe-android-lib
+USE_HDF5 ?= 1
 USE_OPENCV ?= 1
 
 ifeq ($(USE_LEVELDB), 1)
@@ -191,6 +193,10 @@ endif
 ifeq ($(USE_LMDB), 1)
 	LIBRARIES += lmdb
 endif
+# This code is taken from https://github.com/sh1r0/caffe-android-lib
+ifeq ($(USE_HDF5), 1)
+	LIBRARIES += hdf5_hl hdf5
+endif
 ifeq ($(USE_OPENCV), 1)
 	LIBRARIES += opencv_core opencv_highgui opencv_imgproc
 
@@ -347,6 +353,10 @@ ifeq ($(ALLOW_LMDB_NOLOCK), 1)
 	COMMON_FLAGS += -DALLOW_LMDB_NOLOCK
 endif
 endif
+# This code is taken from https://github.com/sh1r0/caffe-android-lib
+ifeq ($(USE_HDF5), 1)
+	COMMON_FLAGS += -DUSE_HDF5
+endif
 
 # CPU-only configuration
 ifeq ($(CPU_ONLY), 1)
diff --git a/Makefile.config.example b/Makefile.config.example
index 79905935f15..24ca632783a 100644
--- a/Makefile.config.example
+++ b/Makefile.config.example
@@ -11,6 +11,8 @@
 # USE_OPENCV := 0
 # USE_LEVELDB := 0
 # USE_LMDB := 0
+# This code is taken from https://github.com/sh1r0/caffe-android-lib
+# USE_HDF5 := 0
 
 # uncomment to allow MDB_NOLOCK when reading LMDB files (only if necessary)
 #	You should not set this flag if you will be reading LMDBs with any
diff --git a/cmake/ConfigGen.cmake b/cmake/ConfigGen.cmake
index 09bb09b4ff2..69889c243b2 100644
--- a/cmake/ConfigGen.cmake
+++ b/cmake/ConfigGen.cmake
@@ -24,6 +24,18 @@ function(caffe_generate_export_configs)
     set(HAVE_CUDA FALSE)
   endif()
 
+  set(HDF5_IMPORTED OFF)
+  foreach(_lib ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES})
+    if(TARGET ${_lib})
+      set(HDF5_IMPORTED ON)
+    endif()
+  endforeach()
+
+  # This code is taken from https://github.com/sh1r0/caffe-android-lib
+  if(USE_HDF5)
+    list(APPEND Caffe_DEFINITIONS -DUSE_HDF5)
+  endif()
+
   if(NOT HAVE_CUDNN)
     set(HAVE_CUDNN FALSE)
   endif()
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index c48255c89f2..ca2e3ad9e5e 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -47,6 +47,14 @@ find_package(HDF5 COMPONENTS HL REQUIRED)
 list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${HDF5_INCLUDE_DIRS})
 list(APPEND Caffe_LINKER_LIBS PUBLIC ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES})
 
+# This code is taken from https://github.com/sh1r0/caffe-android-lib
+if(USE_HDF5)
+  find_package(HDF5 COMPONENTS HL REQUIRED)
+  include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
+  list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES})
+  add_definitions(-DUSE_HDF5)
+endif()
+
 # ---[ LMDB
 if(USE_LMDB)
   find_package(LMDB REQUIRED)
diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
index ed8c25268db..40b8c2f2966 100644
--- a/cmake/Summary.cmake
+++ b/cmake/Summary.cmake
@@ -119,6 +119,8 @@ function(caffe_print_configuration_summary)
   caffe_status("  USE_LMDB          :   ${USE_LMDB}")
   caffe_status("  USE_NCCL          :   ${USE_NCCL}")
   caffe_status("  ALLOW_LMDB_NOLOCK :   ${ALLOW_LMDB_NOLOCK}")
+  # This code is taken from https://github.com/sh1r0/caffe-android-lib
+  caffe_status("  USE_HDF5          :   ${USE_HDF5}")
   caffe_status("")
   caffe_status("Dependencies:")
   caffe_status("  BLAS              : " APPLE THEN "Yes (vecLib)" ELSE "Yes (${BLAS})")
diff --git a/include/caffe/util/hdf5.hpp b/include/caffe/util/hdf5.hpp
index 71549c1cc02..dbd8bb6c5e4 100644
--- a/include/caffe/util/hdf5.hpp
+++ b/include/caffe/util/hdf5.hpp
@@ -1,3 +1,4 @@
+#ifdef USE_HDF5
 #ifndef CAFFE_UTIL_HDF5_H_
 #define CAFFE_UTIL_HDF5_H_
 
@@ -37,3 +38,4 @@ string hdf5_get_name_by_idx(hid_t loc_id, int idx);
 }  // namespace caffe
 
 #endif   // CAFFE_UTIL_HDF5_H_
+#endif   // USE_HDF5
diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp
index 00716a92b15..7668854cc1f 100644
--- a/src/caffe/layers/hdf5_data_layer.cpp
+++ b/src/caffe/layers/hdf5_data_layer.cpp
@@ -1,3 +1,4 @@
+#ifdef USE_HDF5
 /*
 TODO:
 - load file in a separate thread ("prefetch")
@@ -184,3 +185,4 @@ INSTANTIATE_CLASS(HDF5DataLayer);
 REGISTER_LAYER_CLASS(HDF5Data);
 
 }  // namespace caffe
+#endif  // USE_HDF5
diff --git a/src/caffe/layers/hdf5_data_layer.cu b/src/caffe/layers/hdf5_data_layer.cu
index 33eebd41dfc..70cd9f32f85 100644
--- a/src/caffe/layers/hdf5_data_layer.cu
+++ b/src/caffe/layers/hdf5_data_layer.cu
@@ -1,3 +1,4 @@
+#ifdef USE_HDF5
 /*
 TODO:
 - only load parts of the file, in accordance with a prototxt param "max_mem"
@@ -34,3 +35,4 @@ void HDF5DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 INSTANTIATE_LAYER_GPU_FUNCS(HDF5DataLayer);
 
 }  // namespace caffe
+#endif  // USE_HDF5
diff --git a/src/caffe/layers/hdf5_output_layer.cpp b/src/caffe/layers/hdf5_output_layer.cpp
index f8f1edcd18e..28c453a20fd 100644
--- a/src/caffe/layers/hdf5_output_layer.cpp
+++ b/src/caffe/layers/hdf5_output_layer.cpp
@@ -1,3 +1,4 @@
+#ifdef USE_HDF5
 #include <vector>
 
 #include "hdf5.h"
@@ -72,3 +73,4 @@ INSTANTIATE_CLASS(HDF5OutputLayer);
 REGISTER_LAYER_CLASS(HDF5Output);
 
 }  // namespace caffe
+#endif  // USE_HDF5
diff --git a/src/caffe/layers/hdf5_output_layer.cu b/src/caffe/layers/hdf5_output_layer.cu
index c1685cd34a7..891aea03862 100644
--- a/src/caffe/layers/hdf5_output_layer.cu
+++ b/src/caffe/layers/hdf5_output_layer.cu
@@ -1,3 +1,4 @@
+#ifdef USE_HDF5
 #include <vector>
 
 #include "hdf5.h"
@@ -37,3 +38,4 @@ void HDF5OutputLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 INSTANTIATE_LAYER_GPU_FUNCS(HDF5OutputLayer);
 
 }  // namespace caffe
+#endif  // USE_HDF5
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 353c2f95b9e..73adcc6dba1 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -5,7 +5,9 @@
 #include <utility>
 #include <vector>
 
+#ifdef USE_HDF5
 #include "hdf5.h"
+#endif  // USE_HDF5
 
 #include "caffe/common.hpp"
 #include "caffe/layer.hpp"
@@ -786,6 +788,7 @@ void Net<Dtype>::CopyTrainedLayersFromBinaryProto(
 
 template <typename Dtype>
 void Net<Dtype>::CopyTrainedLayersFromHDF5(const string trained_filename) {
+#ifdef USE_HDF5
   hid_t file_hid = H5Fopen(trained_filename.c_str(), H5F_ACC_RDONLY,
                            H5P_DEFAULT);
   CHECK_GE(file_hid, 0) << "Couldn't open " << trained_filename;
@@ -832,6 +835,10 @@ void Net<Dtype>::CopyTrainedLayersFromHDF5(const string trained_filename) {
   }
   H5Gclose(data_hid);
   H5Fclose(file_hid);
+#else
+  LOG(FATAL) << "CopyTrainedLayersFromHDF5 requires hdf5;"
+             << " compile with USE_HDF5.";
+#endif  // USE_HDF5
 }
 
 template <typename Dtype>
@@ -848,6 +855,8 @@ void Net<Dtype>::ToProto(NetParameter* param, bool write_diff) const {
 
 template <typename Dtype>
 void Net<Dtype>::ToHDF5(const string& filename, bool write_diff) const {
+// This code is taken from https://github.com/sh1r0/caffe-android-lib
+#ifdef USE_HDF5
   hid_t file_hid = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT,
       H5P_DEFAULT);
   CHECK_GE(file_hid, 0)
@@ -901,6 +910,10 @@ void Net<Dtype>::ToHDF5(const string& filename, bool write_diff) const {
     H5Gclose(diff_hid);
   }
   H5Fclose(file_hid);
+// This code is taken from https://github.com/sh1r0/caffe-android-lib
+#else
+  LOG(FATAL) << "ToHDF5 requires hdf5; compile with USE_HDF5.";
+#endif  // USE_HDF5
 }
 
 template <typename Dtype>
diff --git a/src/caffe/solvers/sgd_solver.cpp b/src/caffe/solvers/sgd_solver.cpp
index 1d52beb0636..b11a8f41f12 100644
--- a/src/caffe/solvers/sgd_solver.cpp
+++ b/src/caffe/solvers/sgd_solver.cpp
@@ -285,6 +285,8 @@ void SGDSolver<Dtype>::SnapshotSolverStateToBinaryProto(
 template <typename Dtype>
 void SGDSolver<Dtype>::SnapshotSolverStateToHDF5(
     const string& model_filename) {
+// This code is taken from https://github.com/sh1r0/caffe-android-lib
+#ifdef USE_HDF5
   string snapshot_filename =
       Solver<Dtype>::SnapshotFilename(".solverstate.h5");
   LOG(INFO) << "Snapshotting solver state to HDF5 file " << snapshot_filename;
@@ -306,6 +308,11 @@ void SGDSolver<Dtype>::SnapshotSolverStateToHDF5(
   }
   H5Gclose(history_hid);
   H5Fclose(file_hid);
+// This code is taken from https://github.com/sh1r0/caffe-android-lib
+#else
+  LOG(FATAL) << "SnapshotSolverStateToHDF5 requires hdf5;"
+             << " compile with USE_HDF5.";
+#endif  // USE_HDF5
 }
 
 template <typename Dtype>
@@ -330,6 +337,7 @@ void SGDSolver<Dtype>::RestoreSolverStateFromBinaryProto(
 
 template <typename Dtype>
 void SGDSolver<Dtype>::RestoreSolverStateFromHDF5(const string& state_file) {
+#ifdef USE_HDF5
   hid_t file_hid = H5Fopen(state_file.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
   CHECK_GE(file_hid, 0) << "Couldn't open solver state file " << state_file;
   this->iter_ = hdf5_load_int(file_hid, "iter");
@@ -351,6 +359,10 @@ void SGDSolver<Dtype>::RestoreSolverStateFromHDF5(const string& state_file) {
   }
   H5Gclose(history_hid);
   H5Fclose(file_hid);
+#else
+  LOG(FATAL) << "RestoreSolverStateFromHDF5 requires hdf5;"
+             << " compile with USE_HDF5.";
+#endif  // USE_HDF5
 }
 
 INSTANTIATE_CLASS(SGDSolver);
diff --git a/src/caffe/test/test_hdf5_output_layer.cpp b/src/caffe/test/test_hdf5_output_layer.cpp
index f94dd57e7de..11d52310cad 100644
--- a/src/caffe/test/test_hdf5_output_layer.cpp
+++ b/src/caffe/test/test_hdf5_output_layer.cpp
@@ -1,3 +1,4 @@
+#ifdef USE_HDF5
 #include <string>
 #include <vector>
 
@@ -120,3 +121,4 @@ TYPED_TEST(HDF5OutputLayerTest, TestForward) {
 }
 
 }  // namespace caffe
+#endif  // USE_HDF5
diff --git a/src/caffe/test/test_hdf5data_layer.cpp b/src/caffe/test/test_hdf5data_layer.cpp
index 3977c4866c7..0e5c398f966 100644
--- a/src/caffe/test/test_hdf5data_layer.cpp
+++ b/src/caffe/test/test_hdf5data_layer.cpp
@@ -1,3 +1,4 @@
+#ifdef USE_HDF5
 #include <string>
 #include <vector>
 
@@ -163,3 +164,4 @@ TYPED_TEST(HDF5DataLayerTest, TestSkip) {
 }
 
 }  // namespace caffe
+#endif  // USE_HDF5
diff --git a/src/caffe/util/hdf5.cpp b/src/caffe/util/hdf5.cpp
index ed73742937f..cefd853dff4 100644
--- a/src/caffe/util/hdf5.cpp
+++ b/src/caffe/util/hdf5.cpp
@@ -1,3 +1,4 @@
+#ifdef USE_HDF5
 #include "caffe/util/hdf5.hpp"
 
 #include <string>
@@ -207,3 +208,4 @@ string hdf5_get_name_by_idx(hid_t loc_id, int idx) {
 }
 
 }  // namespace caffe
+#endif  // USE_HDF5

From 0536720f41ff6fd43b98a0eb9eb7cd8f0ece5d1e Mon Sep 17 00:00:00 2001
From: Kuang Fangjun <csukuangfj@gmail.com>
Date: Tue, 8 May 2018 10:36:44 +0800
Subject: [PATCH 03/17] fix issue #6387.

---
 include/caffe/syncedmem.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/caffe/syncedmem.hpp b/include/caffe/syncedmem.hpp
index 317ce29a257..8d650a34a8e 100644
--- a/include/caffe/syncedmem.hpp
+++ b/include/caffe/syncedmem.hpp
@@ -66,8 +66,8 @@ class SyncedMemory {
   void* mutable_cpu_data();
   void* mutable_gpu_data();
   enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
-  SyncedHead head() { return head_; }
-  size_t size() { return size_; }
+  SyncedHead head() const { return head_; }
+  size_t size() const { return size_; }
 
 #ifndef CPU_ONLY
   void async_gpu_push(const cudaStream_t& stream);

From 6d912a32bedd4daf6e0f0c3a2622cf4d382ed759 Mon Sep 17 00:00:00 2001
From: Kuang Fangjun <csukuangfj@gmail.com>
Date: Tue, 8 May 2018 10:46:54 +0800
Subject: [PATCH 04/17] fix issue #6389

---
 src/caffe/test/test_syncedmem.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/caffe/test/test_syncedmem.cpp b/src/caffe/test/test_syncedmem.cpp
index 16dfb58230f..2ca9ca2f998 100644
--- a/src/caffe/test/test_syncedmem.cpp
+++ b/src/caffe/test/test_syncedmem.cpp
@@ -80,7 +80,7 @@ TEST_F(SyncedMemoryTest, TestGPURead) {
   char* recovered_value = new char[10];
   caffe_gpu_memcpy(10, gpu_data, recovered_value);
   for (int i = 0; i < mem.size(); ++i) {
-    EXPECT_EQ((static_cast<char*>(recovered_value))[i], 1);
+    EXPECT_EQ(recovered_value[i], 1);
   }
   // do another round
   cpu_data = mem.mutable_cpu_data();
@@ -94,7 +94,7 @@ TEST_F(SyncedMemoryTest, TestGPURead) {
   // check if values are the same
   caffe_gpu_memcpy(10, gpu_data, recovered_value);
   for (int i = 0; i < mem.size(); ++i) {
-    EXPECT_EQ((static_cast<char*>(recovered_value))[i], 2);
+    EXPECT_EQ(recovered_value[i], 2);
   }
   delete[] recovered_value;
 }

From cc1c8fb465fbf48e3048659ca5aa407561df7687 Mon Sep 17 00:00:00 2001
From: Mitar <mitar.git@tnode.com>
Date: Sat, 28 Oct 2017 02:35:17 -0700
Subject: [PATCH 05/17] [pycaffe] expose solver update to do manual solving

a sketch of `solver.step()` done out manually:

1. `solver.net.forward()`
2. `solver.net.backward()`
3. `solver.net.apply_update()`
4. `solver.net.clear_param_diffs()`
---
 include/caffe/sgd_solvers.hpp | 3 ++-
 include/caffe/solver.hpp      | 3 ++-
 python/caffe/_caffe.cpp       | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/caffe/sgd_solvers.hpp b/include/caffe/sgd_solvers.hpp
index 1fc52d87137..f1819bb62dc 100644
--- a/include/caffe/sgd_solvers.hpp
+++ b/include/caffe/sgd_solvers.hpp
@@ -23,10 +23,11 @@ class SGDSolver : public Solver<Dtype> {
 
   const vector<shared_ptr<Blob<Dtype> > >& history() { return history_; }
 
+  virtual void ApplyUpdate();
+
  protected:
   void PreSolve();
   Dtype GetLearningRate();
-  virtual void ApplyUpdate();
   virtual void Normalize(int param_id);
   virtual void Regularize(int param_id);
   virtual void ComputeUpdateValue(int param_id, Dtype rate);
diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index a28d8cb897e..75560f9fd08 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -94,9 +94,10 @@ class Solver {
    */
   virtual inline const char* type() const { return ""; }
 
- protected:
   // Make and apply the update value for the current iteration.
   virtual void ApplyUpdate() = 0;
+
+ protected:
   string SnapshotFilename(const string extension);
   string SnapshotToBinaryProto();
   string SnapshotToHDF5();
diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 72659a4f44e..eed16c29c84 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -507,6 +507,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .def("restore", &Solver<Dtype>::Restore)
     .def("snapshot", &Solver<Dtype>::Snapshot)
     .def("share_weights", &share_weights)
+    .def("apply_update", &Solver<Dtype>::ApplyUpdate)
     .add_property("param", bp::make_function(&Solver<Dtype>::param,
               bp::return_value_policy<bp::copy_const_reference>()));
   BP_REGISTER_SHARED_PTR_TO_PYTHON(Solver<Dtype>);

From c74913d4b5b6c120e40bfeaab43fde45acc22c24 Mon Sep 17 00:00:00 2001
From: Mitar <mitar.git@tnode.com>
Date: Sat, 28 Oct 2017 02:52:35 -0700
Subject: [PATCH 06/17] increment iteration during update, not step

with update exposed it is important to increment the iteration when an
update is made, whether by step or update alone. more fundementally,
it's the update that defines an iterationa, so this is a natural place
for the increment.
---
 src/caffe/solver.cpp             | 4 ----
 src/caffe/solvers/sgd_solver.cpp | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index d229acff485..bf27beeed41 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -266,10 +266,6 @@ void Solver<Dtype>::Step(int iters) {
     }
     ApplyUpdate();
 
-    // Increment the internal iter_ counter -- its value should always indicate
-    // the number of times the weights have been updated.
-    ++iter_;
-
     SolverAction::Enum request = GetRequestedAction();
 
     // Save a snapshot if needed.
diff --git a/src/caffe/solvers/sgd_solver.cpp b/src/caffe/solvers/sgd_solver.cpp
index 1d52beb0636..a56a2d0bf51 100644
--- a/src/caffe/solvers/sgd_solver.cpp
+++ b/src/caffe/solvers/sgd_solver.cpp
@@ -120,6 +120,10 @@ void SGDSolver<Dtype>::ApplyUpdate() {
     ComputeUpdateValue(param_id, rate);
   }
   this->net_->Update();
+
+  // Increment the internal iter_ counter -- its value should always indicate
+  // the number of times the weights have been updated.
+  ++this->iter_;
 }
 
 template <typename Dtype>

From cfcf74fecf2e2b9d35974c5b0a0554921ad66984 Mon Sep 17 00:00:00 2001
From: Mitar <mitar.git@tnode.com>
Date: Sun, 10 Dec 2017 03:11:26 -0800
Subject: [PATCH 07/17] [pycaffe] expose mutable solver parameter, base lr, and
 effective lr

`solver.lr` is the effective learning rate in use while `solver.base_lr`
is the configured learning rate at initialization. the solver parameter
is now editable for setting fields that are in use throughout the
lifetime of the solver, such as the maximum iteration.
---
 include/caffe/sgd_solvers.hpp |  2 +-
 python/caffe/_caffe.cpp       | 19 +++++++++++--------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/include/caffe/sgd_solvers.hpp b/include/caffe/sgd_solvers.hpp
index f1819bb62dc..925ff78331e 100644
--- a/include/caffe/sgd_solvers.hpp
+++ b/include/caffe/sgd_solvers.hpp
@@ -24,10 +24,10 @@ class SGDSolver : public Solver<Dtype> {
   const vector<shared_ptr<Blob<Dtype> > >& history() { return history_; }
 
   virtual void ApplyUpdate();
+  Dtype GetLearningRate();
 
  protected:
   void PreSolve();
-  Dtype GetLearningRate();
   virtual void Normalize(int param_id);
   virtual void Regularize(int param_id);
   virtual void ComputeUpdateValue(int param_id, Dtype rate);
diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index eed16c29c84..9e7f61402c4 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -490,7 +490,9 @@ BOOST_PYTHON_MODULE(_caffe) {
   bp::class_<SolverParameter>("SolverParameter", bp::no_init)
     .add_property("max_iter", &SolverParameter::max_iter)
     .add_property("display", &SolverParameter::display)
-    .add_property("layer_wise_reduce", &SolverParameter::layer_wise_reduce);
+    .add_property("layer_wise_reduce", &SolverParameter::layer_wise_reduce)
+    .add_property("base_lr", &SolverParameter::base_lr,
+           &SolverParameter::set_base_lr);
   bp::class_<LayerParameter>("LayerParameter", bp::no_init);
 
   bp::class_<Solver<Dtype>, shared_ptr<Solver<Dtype> >, boost::noncopyable>(
@@ -509,25 +511,26 @@ BOOST_PYTHON_MODULE(_caffe) {
     .def("share_weights", &share_weights)
     .def("apply_update", &Solver<Dtype>::ApplyUpdate)
     .add_property("param", bp::make_function(&Solver<Dtype>::param,
-              bp::return_value_policy<bp::copy_const_reference>()));
+              bp::return_internal_reference<>()));
   BP_REGISTER_SHARED_PTR_TO_PYTHON(Solver<Dtype>);
 
   bp::class_<SGDSolver<Dtype>, bp::bases<Solver<Dtype> >,
     shared_ptr<SGDSolver<Dtype> >, boost::noncopyable>(
-        "SGDSolver", bp::init<string>());
-  bp::class_<NesterovSolver<Dtype>, bp::bases<Solver<Dtype> >,
+        "SGDSolver", bp::init<string>())
+        .add_property("lr", &SGDSolver<Dtype>::GetLearningRate);
+  bp::class_<NesterovSolver<Dtype>, bp::bases<SGDSolver<Dtype> >,
     shared_ptr<NesterovSolver<Dtype> >, boost::noncopyable>(
         "NesterovSolver", bp::init<string>());
-  bp::class_<AdaGradSolver<Dtype>, bp::bases<Solver<Dtype> >,
+  bp::class_<AdaGradSolver<Dtype>, bp::bases<SGDSolver<Dtype> >,
     shared_ptr<AdaGradSolver<Dtype> >, boost::noncopyable>(
         "AdaGradSolver", bp::init<string>());
-  bp::class_<RMSPropSolver<Dtype>, bp::bases<Solver<Dtype> >,
+  bp::class_<RMSPropSolver<Dtype>, bp::bases<SGDSolver<Dtype> >,
     shared_ptr<RMSPropSolver<Dtype> >, boost::noncopyable>(
         "RMSPropSolver", bp::init<string>());
-  bp::class_<AdaDeltaSolver<Dtype>, bp::bases<Solver<Dtype> >,
+  bp::class_<AdaDeltaSolver<Dtype>, bp::bases<SGDSolver<Dtype> >,
     shared_ptr<AdaDeltaSolver<Dtype> >, boost::noncopyable>(
         "AdaDeltaSolver", bp::init<string>());
-  bp::class_<AdamSolver<Dtype>, bp::bases<Solver<Dtype> >,
+  bp::class_<AdamSolver<Dtype>, bp::bases<SGDSolver<Dtype> >,
     shared_ptr<AdamSolver<Dtype> >, boost::noncopyable>(
         "AdamSolver", bp::init<string>());
 

From 1bdcb74ea0d0acc7beb729ad7f01b0e5e44528a5 Mon Sep 17 00:00:00 2001
From: Valentin Tolmer <valentin.tolmer@gmail.com>
Date: Tue, 21 Jun 2016 17:12:57 -0700
Subject: [PATCH 08/17] [pycaffe] test solver update

---
 python/caffe/test/test_solver.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/python/caffe/test/test_solver.py b/python/caffe/test/test_solver.py
index f618fded8cd..50c9d5412d7 100644
--- a/python/caffe/test/test_solver.py
+++ b/python/caffe/test/test_solver.py
@@ -38,6 +38,17 @@ def test_solve(self):
         self.solver.solve()
         self.assertEqual(self.solver.iter, 100)
 
+    def test_apply_update(self):
+        net = self.solver.net
+        data = net.layers[1].blobs[0].data[...]
+        # Reset the weights of that layer to 0
+        data[...] = 0
+        net.layers[1].blobs[0].diff[...] = 1
+        # Apply the update, the initial learning rate should be 0.01
+        self.solver.apply_update()
+        # Check that the new weights are -0.01, with a precision of 1e-7
+        self.assertTrue((data - -0.01 * np.ones(data.shape)).max() < 1e-7)
+
     def test_net_memory(self):
         """Check that nets survive after the solver is destroyed."""
 

From 72e953ba151642850ef8ac4c4e7bf4181660be51 Mon Sep 17 00:00:00 2001
From: Yuda Liu <liu_yuda@163.com>
Date: Sun, 8 Jul 2018 15:46:43 +0800
Subject: [PATCH 09/17] Update inner_product_layer.cpp

---
 src/caffe/layers/inner_product_layer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp
index e65349f0055..57fdbe1fac2 100644
--- a/src/caffe/layers/inner_product_layer.cpp
+++ b/src/caffe/layers/inner_product_layer.cpp
@@ -42,7 +42,7 @@ void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
         this->layer_param_.inner_product_param().weight_filler()));
     weight_filler->Fill(this->blobs_[0].get());
-    // If necessary, intiialize and fill the bias term
+    // If necessary, initialize and fill the bias term
     if (bias_term_) {
       vector<int> bias_shape(1, N_);
       this->blobs_[1].reset(new Blob<Dtype>(bias_shape));

From f019d0dfe86f49d1140961f8c7dec22130c83154 Mon Sep 17 00:00:00 2001
From: Kuang Fangjun <csukuangfj@gmail.com>
Date: Thu, 12 Jul 2018 16:36:06 +0800
Subject: [PATCH 10/17] fix typos and some minor fixes.

---
 cmake/Modules/FindMKL.cmake         | 2 +-
 include/caffe/net.hpp               | 6 +++---
 include/caffe/solver.hpp            | 4 ++--
 include/caffe/util/signal_handler.h | 2 +-
 python/caffe/_caffe.cpp             | 2 +-
 src/caffe/layers/pooling_layer.cpp  | 2 +-
 src/caffe/net.cpp                   | 8 ++++----
 src/caffe/proto/caffe.proto         | 6 +++---
 src/caffe/solver.cpp                | 6 +++---
 src/caffe/util/signal_handler.cpp   | 2 +-
 10 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/cmake/Modules/FindMKL.cmake b/cmake/Modules/FindMKL.cmake
index 5ab93b2d6b6..ef0c3bf1c64 100644
--- a/cmake/Modules/FindMKL.cmake
+++ b/cmake/Modules/FindMKL.cmake
@@ -9,7 +9,7 @@
 # This module defines the following variables:
 #
 #   MKL_FOUND            : True mkl is found
-#   MKL_INCLUDE_DIR      : unclude directory
+#   MKL_INCLUDE_DIR      : include directory
 #   MKL_LIBRARIES        : the libraries to link against.
 
 
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index d3c9306e9cf..143d5d28883 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -111,9 +111,9 @@ class Net {
    *        another Net.
    */
   void CopyTrainedLayersFrom(const NetParameter& param);
-  void CopyTrainedLayersFrom(const string trained_filename);
-  void CopyTrainedLayersFromBinaryProto(const string trained_filename);
-  void CopyTrainedLayersFromHDF5(const string trained_filename);
+  void CopyTrainedLayersFrom(const string& trained_filename);
+  void CopyTrainedLayersFromBinaryProto(const string& trained_filename);
+  void CopyTrainedLayersFromHDF5(const string& trained_filename);
   /// @brief Writes the net to a proto.
   void ToProto(NetParameter* param, bool write_diff = false) const;
   /// @brief Writes the net to an HDF5 file.
diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index 75560f9fd08..7a0d7777f2d 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -55,7 +55,7 @@ class Solver {
   // The main entry of the solver function. In default, iter will be zero. Pass
   // in a non-zero iter number to resume training for a pre-trained net.
   virtual void Solve(const char* resume_file = NULL);
-  inline void Solve(const string resume_file) { Solve(resume_file.c_str()); }
+  inline void Solve(const string& resume_file) { Solve(resume_file.c_str()); }
   void Step(int iters);
   // The Restore method simply dispatches to one of the
   // RestoreSolverStateFrom___ protected methods. You should implement these
@@ -98,7 +98,7 @@ class Solver {
   virtual void ApplyUpdate() = 0;
 
  protected:
-  string SnapshotFilename(const string extension);
+  string SnapshotFilename(const string& extension);
   string SnapshotToBinaryProto();
   string SnapshotToHDF5();
   // The test routine
diff --git a/include/caffe/util/signal_handler.h b/include/caffe/util/signal_handler.h
index fb84c65bd2e..5246332581e 100644
--- a/include/caffe/util/signal_handler.h
+++ b/include/caffe/util/signal_handler.h
@@ -8,7 +8,7 @@ namespace caffe {
 
 class SignalHandler {
  public:
-  // Contructor. Specify what action to take when a signal is received.
+  // Constructor. Specify what action to take when a signal is received.
   SignalHandler(SolverAction::Enum SIGINT_action,
                 SolverAction::Enum SIGHUP_action);
   ~SignalHandler();
diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 9e7f61402c4..82bf21e6e16 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -416,7 +416,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .def("reshape", &Net<Dtype>::Reshape)
     .def("clear_param_diffs", &Net<Dtype>::ClearParamDiffs)
     // The cast is to select a particular overload.
-    .def("copy_from", static_cast<void (Net<Dtype>::*)(const string)>(
+    .def("copy_from", static_cast<void (Net<Dtype>::*)(const string&)>(
         &Net<Dtype>::CopyTrainedLayersFrom))
     .def("share_with", &Net<Dtype>::ShareTrainedLayersWith)
     .add_property("_blob_loss_weights", bp::make_function(
diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp
index 90897db0f45..1fa78904ea8 100644
--- a/src/caffe/layers/pooling_layer.cpp
+++ b/src/caffe/layers/pooling_layer.cpp
@@ -132,7 +132,7 @@ void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   const int top_count = top[0]->count();
   // We'll output the mask to top[1] if it's of size >1.
   const bool use_top_mask = top.size() > 1;
-  int* mask = NULL;  // suppress warnings about uninitalized variables
+  int* mask = NULL;  // suppress warnings about uninitialized variables
   Dtype* top_mask = NULL;
   // Different pooling methods. We explicitly do the switch outside the for
   // loop to save time, although this results in more code.
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 353c2f95b9e..94c0220f172 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -164,7 +164,7 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
   // loss.  We can skip backward computation for blobs that don't contribute
   // to the loss.
   // Also checks if all bottom blobs don't need backward computation (possible
-  // because the skip_propagate_down param) and so we can skip bacward
+  // because the skip_propagate_down param) and so we can skip backward
   // computation for the entire layer
   set<string> blobs_under_loss;
   set<string> blobs_skip_backp;
@@ -768,7 +768,7 @@ void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
 }
 
 template <typename Dtype>
-void Net<Dtype>::CopyTrainedLayersFrom(const string trained_filename) {
+void Net<Dtype>::CopyTrainedLayersFrom(const string& trained_filename) {
   if (H5Fis_hdf5(trained_filename.c_str())) {
     CopyTrainedLayersFromHDF5(trained_filename);
   } else {
@@ -778,14 +778,14 @@ void Net<Dtype>::CopyTrainedLayersFrom(const string trained_filename) {
 
 template <typename Dtype>
 void Net<Dtype>::CopyTrainedLayersFromBinaryProto(
-    const string trained_filename) {
+    const string& trained_filename) {
   NetParameter param;
   ReadNetParamsFromBinaryFileOrDie(trained_filename, &param);
   CopyTrainedLayersFrom(param);
 }
 
 template <typename Dtype>
-void Net<Dtype>::CopyTrainedLayersFromHDF5(const string trained_filename) {
+void Net<Dtype>::CopyTrainedLayersFromHDF5(const string& trained_filename) {
   hid_t file_hid = H5Fopen(trained_filename.c_str(), H5F_ACC_RDONLY,
                            H5P_DEFAULT);
   CHECK_GE(file_hid, 0) << "Couldn't open " << trained_filename;
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index b9bb3f4dffe..2f8dffc0e1b 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -187,7 +187,7 @@ message SolverParameter {
 
   optional int32 snapshot = 14 [default = 0]; // The snapshot interval
   // The prefix for the snapshot.
-  // If not set then is replaced by prototxt file path without extention.
+  // If not set then is replaced by prototxt file path without extension.
   // If is set to directory then is augmented by prototxt file name
   // without extention.
   optional string snapshot_prefix = 15;
@@ -248,8 +248,8 @@ message SolverParameter {
 
   // Path to caffemodel file(s) with pretrained weights to initialize finetuning.
   // Tha same as command line --weights parameter for caffe train command.
-  // If command line --weights parameter if specified, it has higher priority
-  // and owerwrites this one(s).
+  // If command line --weights parameter is specified, it has higher priority
+  // and overwrites this one(s).
   // If --snapshot command line parameter is specified, this one(s) are ignored.
   // If several model files are expected, they can be listed in a one 
   // weights parameter separated by ',' (like in a command string) or
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index bf27beeed41..842312e0b76 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -78,7 +78,7 @@ template <typename Dtype>
 void Solver<Dtype>::InitTrainNet() {
   const int num_train_nets = param_.has_net() + param_.has_net_param() +
       param_.has_train_net() + param_.has_train_net_param();
-  const string& field_names = "net, net_param, train_net, train_net_param";
+  const string field_names = "net, net_param, train_net, train_net_param";
   CHECK_GE(num_train_nets, 1) << "SolverParameter must specify a train net "
       << "using one of these fields: " << field_names;
   CHECK_LE(num_train_nets, 1) << "SolverParameter must not contain more than "
@@ -447,13 +447,13 @@ void Solver<Dtype>::CheckSnapshotWritePermissions() {
     } else {
       LOG(FATAL) << "Cannot write to snapshot prefix '"
           << param_.snapshot_prefix() << "'.  Make sure "
-          << "that the directory exists and is writeable.";
+          << "that the directory exists and is writable.";
     }
   }
 }
 
 template <typename Dtype>
-string Solver<Dtype>::SnapshotFilename(const string extension) {
+string Solver<Dtype>::SnapshotFilename(const string& extension) {
   return param_.snapshot_prefix() + "_iter_" + caffe::format_int(iter_)
     + extension;
 }
diff --git a/src/caffe/util/signal_handler.cpp b/src/caffe/util/signal_handler.cpp
index 5d764ec524f..9658fb390ea 100644
--- a/src/caffe/util/signal_handler.cpp
+++ b/src/caffe/util/signal_handler.cpp
@@ -48,7 +48,7 @@ namespace {
   void UnhookHandler() {
     if (already_hooked_up) {
       struct sigaction sa;
-      // Setup the sighub handler
+      // Setup the sighup handler
       sa.sa_handler = SIG_DFL;
       // Restart the system call, if at all possible
       sa.sa_flags = SA_RESTART;

From 43536289bd770f7bd29ce407361d78601b9ff2f0 Mon Sep 17 00:00:00 2001
From: Pavel Grunt <pavel.grunt@innovatrics.com>
Date: Tue, 14 Aug 2018 15:22:08 +0200
Subject: [PATCH 11/17] python: Set gpu device id before setting gpu mode

Otherwise caffe allocates some memory on GPU#0
---
 docs/tutorial/interfaces.md | 2 +-
 python/train.py             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/tutorial/interfaces.md b/docs/tutorial/interfaces.md
index b5a4f1ad069..2578af5d4de 100644
--- a/docs/tutorial/interfaces.md
+++ b/docs/tutorial/interfaces.md
@@ -129,8 +129,8 @@ Use CPU:
 
 Use GPU and specify its gpu_id:
 
-    caffe.set_mode_gpu();
     caffe.set_device(gpu_id);
+    caffe.set_mode_gpu();
 
 #### Create a network and access its layers and blobs
 
diff --git a/python/train.py b/python/train.py
index 5897f5dcb90..14a38b8cef1 100644
--- a/python/train.py
+++ b/python/train.py
@@ -63,8 +63,8 @@ def show_time():
 
 
 def solve(proto, snapshot, gpus, timing, uid, rank):
-    caffe.set_mode_gpu()
     caffe.set_device(gpus[rank])
+    caffe.set_mode_gpu()
     caffe.set_solver_count(len(gpus))
     caffe.set_solver_rank(rank)
     caffe.set_multiprocess(True)

From 7f4f5d2563abaecb5ab983d2bac4daf21e5b3a98 Mon Sep 17 00:00:00 2001
From: Harm Berntsen <harm.berntsen@nedap.com>
Date: Mon, 18 Jan 2016 11:41:14 +0100
Subject: [PATCH 12/17] Add clip layer

---
 include/caffe/layers/clip_layer.hpp  | 75 ++++++++++++++++++++++++++++
 src/caffe/layer_factory.cpp          |  1 +
 src/caffe/layers/clip_layer.cpp      | 50 +++++++++++++++++++
 src/caffe/layers/clip_layer.cu       | 66 ++++++++++++++++++++++++
 src/caffe/proto/caffe.proto          |  9 +++-
 src/caffe/test/test_neuron_layer.cpp | 33 ++++++++++++
 6 files changed, 233 insertions(+), 1 deletion(-)
 create mode 100644 include/caffe/layers/clip_layer.hpp
 create mode 100644 src/caffe/layers/clip_layer.cpp
 create mode 100644 src/caffe/layers/clip_layer.cu

diff --git a/include/caffe/layers/clip_layer.hpp b/include/caffe/layers/clip_layer.hpp
new file mode 100644
index 00000000000..2788193e3ec
--- /dev/null
+++ b/include/caffe/layers/clip_layer.hpp
@@ -0,0 +1,75 @@
+#ifndef CAFFE_CLIP_LAYER_HPP_
+#define CAFFE_CLIP_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+#include "caffe/layers/neuron_layer.hpp"
+
+namespace caffe {
+
+/**
+ * @brief Clip: @f$ y = \max(min, \min(max, x)) @f$.
+ */
+template <typename Dtype>
+class ClipLayer : public NeuronLayer<Dtype> {
+ public:
+  /**
+   * @param param provides ClipParameter clip_param,
+   *     with ClipLayer options:
+   *   - min
+   *   - max
+   */
+  explicit ClipLayer(const LayerParameter& param)
+      : NeuronLayer<Dtype>(param) {}
+
+  virtual inline const char* type() const { return "Clip"; }
+
+ protected:
+  /**
+   * @param bottom input Blob vector (length 1)
+   *   -# @f$ (N \times C \times H \times W) @f$
+   *      the inputs @f$ x @f$
+   * @param top output Blob vector (length 1)
+   *   -# @f$ (N \times C \times H \times W) @f$
+   *      the computed outputs @f$
+   *        y = \max(min, \min(max, x))
+   *      @f$
+   */
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  /**
+   * @brief Computes the error gradient w.r.t. the clipped inputs.
+   *
+   * @param top output Blob vector (length 1), providing the error gradient with
+   *      respect to the outputs
+   *   -# @f$ (N \times C \times H \times W) @f$
+   *      containing error gradients @f$ \frac{\partial E}{\partial y} @f$
+   *      with respect to computed outputs @f$ y @f$
+   * @param propagate_down see Layer::Backward.
+   * @param bottom input Blob vector (length 1)
+   *   -# @f$ (N \times C \times H \times W) @f$
+   *      the inputs @f$ x @f$; Backward fills their diff with
+   *      gradients @f$
+   *        \frac{\partial E}{\partial x} = \left\{
+   *        \begin{array}{lr}
+   *            0 & \mathrm{if} \; x < min \vee x > max \\
+   *            \frac{\partial E}{\partial y} & \mathrm{if} \; x \ge min \wedge x \le max
+   *        \end{array} \right.
+   *      @f$
+   */
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_CLIP_LAYER_HPP_
diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp
index 9f9026b1dde..d9984431ace 100644
--- a/src/caffe/layer_factory.cpp
+++ b/src/caffe/layer_factory.cpp
@@ -7,6 +7,7 @@
 
 #include "caffe/layer.hpp"
 #include "caffe/layer_factory.hpp"
+#include "caffe/layers/clip_layer.hpp"
 #include "caffe/layers/conv_layer.hpp"
 #include "caffe/layers/deconv_layer.hpp"
 #include "caffe/layers/lrn_layer.hpp"
diff --git a/src/caffe/layers/clip_layer.cpp b/src/caffe/layers/clip_layer.cpp
new file mode 100644
index 00000000000..76387011fa3
--- /dev/null
+++ b/src/caffe/layers/clip_layer.cpp
@@ -0,0 +1,50 @@
+#include <algorithm>
+#include <vector>
+#include "caffe/layers/clip_layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ClipLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  const Dtype* bottom_data = bottom[0]->cpu_data();
+  Dtype* top_data = top[0]->mutable_cpu_data();
+  const int count = bottom[0]->count();
+
+  Dtype min = this->layer_param_.clip_param().min();
+  Dtype max = this->layer_param_.clip_param().max();
+
+  for (int i = 0; i < count; ++i) {
+    top_data[i] = std::max(min, std::min(bottom_data[i], max));
+  }
+}
+
+template <typename Dtype>
+void ClipLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down,
+    const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[0]) {
+    const Dtype* bottom_data = bottom[0]->cpu_data();
+    const Dtype* top_diff = top[0]->cpu_diff();
+    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+    const int count = bottom[0]->count();
+
+    Dtype min = this->layer_param_.clip_param().min();
+    Dtype max = this->layer_param_.clip_param().max();
+
+    for (int i = 0; i < count; ++i) {
+      bottom_diff[i] = top_diff[i] * (
+              bottom_data[i] >= min && bottom_data[i] <= max);
+    }
+  }
+}
+
+
+#ifdef CPU_ONLY
+STUB_GPU(ClipLayer);
+#endif
+
+INSTANTIATE_CLASS(ClipLayer);
+REGISTER_LAYER_CLASS(Clip);
+
+}  // namespace caffe
diff --git a/src/caffe/layers/clip_layer.cu b/src/caffe/layers/clip_layer.cu
new file mode 100644
index 00000000000..f780447fbcf
--- /dev/null
+++ b/src/caffe/layers/clip_layer.cu
@@ -0,0 +1,66 @@
+#include <vector>
+#include "caffe/layers/clip_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+__global__ void ClipForward(const int n, const float* in, float* out,
+    float p_min, float p_max) {
+  CUDA_KERNEL_LOOP(index, n) {
+    out[index] = fmaxf(p_min, fminf(in[index], p_max));
+  }
+}
+
+__global__ void ClipForward(const int n, const double* in, double* out,
+    double p_min, double p_max) {
+  CUDA_KERNEL_LOOP(index, n) {
+    out[index] = fmax(p_min, fmin(in[index], p_max));
+  }
+}
+
+template <typename Dtype>
+void ClipLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  Dtype* top_data = top[0]->mutable_gpu_data();
+  const int count = bottom[0]->count();
+  Dtype p_min = this->layer_param_.clip_param().min();
+  Dtype p_max = this->layer_param_.clip_param().max();
+  // NOLINT_NEXT_LINE(whitespace/operators)
+  ClipForward<<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+      count, bottom_data, top_data, p_min, p_max);
+  CUDA_POST_KERNEL_CHECK;
+}
+
+template <typename Dtype>
+__global__ void ClipBackward(const int n, const Dtype* in_diff,
+    const Dtype* in_data, Dtype* out_diff, Dtype p_min, Dtype p_max) {
+  CUDA_KERNEL_LOOP(index, n) {
+    out_diff[index] = in_diff[index] * (
+            in_data[index] >= p_min && in_data[index] <= p_max);
+  }
+}
+
+template <typename Dtype>
+void ClipLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down,
+    const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[0]) {
+    const Dtype* bottom_data = bottom[0]->gpu_data();
+    const Dtype* top_diff = top[0]->gpu_diff();
+    Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+    const int count = bottom[0]->count();
+    Dtype p_min = this->layer_param_.clip_param().min();
+    Dtype p_max = this->layer_param_.clip_param().max();
+    // NOLINT_NEXT_LINE(whitespace/operators)
+    ClipBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+        count, top_diff, bottom_data, bottom_diff, p_min, p_max);
+    CUDA_POST_KERNEL_CHECK;
+  }
+}
+
+
+INSTANTIATE_LAYER_GPU_FUNCS(ClipLayer);
+
+
+}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index f784aa9600c..5c235c6f87c 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -322,7 +322,7 @@ message ParamSpec {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available layer-specific ID: 148 (last added: swish_param)
+// LayerParameter next available layer-specific ID: 149 (last added: clip_param)
 message LayerParameter {
   optional string name = 1; // the layer name
   optional string type = 2; // the layer type
@@ -378,6 +378,7 @@ message LayerParameter {
   optional ArgMaxParameter argmax_param = 103;
   optional BatchNormParameter batch_norm_param = 139;
   optional BiasParameter bias_param = 141;
+  optional ClipParameter clip_param = 148;
   optional ConcatParameter concat_param = 104;
   optional ContrastiveLossParameter contrastive_loss_param = 105;
   optional ConvolutionParameter convolution_param = 106;
@@ -505,6 +506,12 @@ message ArgMaxParameter {
   optional int32 axis = 3;
 }
 
+// Message that stores parameters used by ClipLayer
+message ClipParameter {
+  required float min = 1;
+  required float max = 2;
+}
+
 message ConcatParameter {
   // The axis along which to concatenate -- may be negative to index from the
   // end (e.g., -1 for the last axis).  Other axes must have the
diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp
index 83d80fcd895..5865e08e552 100644
--- a/src/caffe/test/test_neuron_layer.cpp
+++ b/src/caffe/test/test_neuron_layer.cpp
@@ -10,6 +10,7 @@
 
 #include "caffe/layers/absval_layer.hpp"
 #include "caffe/layers/bnll_layer.hpp"
+#include "caffe/layers/clip_layer.hpp"
 #include "caffe/layers/dropout_layer.hpp"
 #include "caffe/layers/elu_layer.hpp"
 #include "caffe/layers/exp_layer.hpp"
@@ -206,6 +207,38 @@ TYPED_TEST(NeuronLayerTest, TestAbsGradient) {
       this->blob_top_vec_);
 }
 
+TYPED_TEST(NeuronLayerTest, TestClip) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  CHECK(google::protobuf::TextFormat::ParseFromString(
+      "clip_param { min: -1, max: 2 }", &layer_param));
+  ClipLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  // Now, check values
+  const Dtype* bottom_data = this->blob_bottom_->cpu_data();
+  const Dtype* top_data = this->blob_top_->cpu_data();
+  for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+    EXPECT_GE(top_data[i], -1);
+    EXPECT_LE(top_data[i], 2);
+    EXPECT_TRUE(bottom_data[i] > -1 || top_data[i] == -1);
+    EXPECT_TRUE(bottom_data[i] < 2 || top_data[i] == 2);
+    EXPECT_TRUE(!(bottom_data[i] >= -1 && bottom_data[i] <= 2)
+            || top_data[i] == bottom_data[i]);
+  }
+}
+
+TYPED_TEST(NeuronLayerTest, TestClipGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  CHECK(google::protobuf::TextFormat::ParseFromString(
+      "clip_param { min: -1, max: 2 }", &layer_param));
+  ClipLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_);
+}
+
 TYPED_TEST(NeuronLayerTest, TestReLU) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;

From b56db67b974547d0f78f9dbee097ddabdf8f0a1d Mon Sep 17 00:00:00 2001
From: Noiredd <snowball91b@gmail.com>
Date: Fri, 30 Mar 2018 11:43:44 +0200
Subject: [PATCH 13/17] test case fix for Clip layer gradient

minor lint fixes
---
 src/caffe/layers/clip_layer.cpp      |  1 +
 src/caffe/layers/clip_layer.cu       |  1 +
 src/caffe/test/test_neuron_layer.cpp | 34 +++++++++++++++++++++++++---
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/caffe/layers/clip_layer.cpp b/src/caffe/layers/clip_layer.cpp
index 76387011fa3..9d9a59673c0 100644
--- a/src/caffe/layers/clip_layer.cpp
+++ b/src/caffe/layers/clip_layer.cpp
@@ -1,5 +1,6 @@
 #include <algorithm>
 #include <vector>
+
 #include "caffe/layers/clip_layer.hpp"
 
 namespace caffe {
diff --git a/src/caffe/layers/clip_layer.cu b/src/caffe/layers/clip_layer.cu
index f780447fbcf..56f3be32d7d 100644
--- a/src/caffe/layers/clip_layer.cu
+++ b/src/caffe/layers/clip_layer.cu
@@ -1,4 +1,5 @@
 #include <vector>
+
 #include "caffe/layers/clip_layer.hpp"
 #include "caffe/util/math_functions.hpp"
 
diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp
index 5865e08e552..d1ecc37b661 100644
--- a/src/caffe/test/test_neuron_layer.cpp
+++ b/src/caffe/test/test_neuron_layer.cpp
@@ -234,9 +234,37 @@ TYPED_TEST(NeuronLayerTest, TestClipGradient) {
   CHECK(google::protobuf::TextFormat::ParseFromString(
       "clip_param { min: -1, max: 2 }", &layer_param));
   ClipLayer<Dtype> layer(layer_param);
-  GradientChecker<Dtype> checker(1e-2, 1e-3);
-  checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_,
-      this->blob_top_vec_);
+  // Unfortunately, it might happen that an input value lands exactly within
+  // the discontinuity region of the Clip function. In this case the numeric
+  // gradient is likely to differ significantly (i.e. by a value larger than
+  // checker tolerance) from the computed gradient. To handle such cases, we
+  // eliminate such values from the input blob before the gradient check.
+  const Dtype epsilon = 1e-2;
+  const Dtype min_range_start = layer_param.clip_param().min() - epsilon;
+  const Dtype min_range_end   = layer_param.clip_param().min() + epsilon;
+  const Dtype max_range_start = layer_param.clip_param().max() - epsilon;
+  const Dtype max_range_end   = layer_param.clip_param().max() + epsilon;
+  // The input blob is owned by the NeuronLayerTest object, so we begin with
+  // creating a temporary blob and copying the input data there.
+  Blob<Dtype> temp_bottom;
+  temp_bottom.ReshapeLike(*this->blob_bottom_);
+  const Dtype* bottom_data = this->blob_bottom_->cpu_data();
+  Dtype* temp_data_mutable = temp_bottom.mutable_cpu_data();
+  for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+    if (bottom_data[i] >= min_range_start &&
+        bottom_data[i] <= min_range_end) {
+      temp_data_mutable[i] = bottom_data[i] - epsilon;
+    } else if (bottom_data[i] >= max_range_start &&
+               bottom_data[i] <= max_range_end) {
+      temp_data_mutable[i] = bottom_data[i] + epsilon;
+    } else {
+      temp_data_mutable[i] = bottom_data[i];
+    }
+  }
+  vector<Blob<Dtype>*> temp_bottom_vec;
+  temp_bottom_vec.push_back(&temp_bottom);
+  GradientChecker<Dtype> checker(epsilon, 1e-3);
+  checker.CheckGradientEltwise(&layer, temp_bottom_vec, this->blob_top_vec_);
 }
 
 TYPED_TEST(NeuronLayerTest, TestReLU) {

From 4ac6443908fde20429cfc2e4dd7b9cd4696ee415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Przemys=C5=82aw=20Dolata?= <snowball91b@gmail.com>
Date: Fri, 17 Aug 2018 14:45:04 +0200
Subject: [PATCH 14/17] Clip layer documentation

---
 docs/tutorial/layers.md      |  1 +
 docs/tutorial/layers/clip.md | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)
 create mode 100644 docs/tutorial/layers/clip.md

diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md
index 78a46f3a7ee..5036d4fd7c0 100644
--- a/docs/tutorial/layers.md
+++ b/docs/tutorial/layers.md
@@ -93,6 +93,7 @@ Layers:
 * [Log](layers/log.html) - f(x) = log(x).
 * [BNLL](layers/bnll.html) - f(x) = log(1 + exp(x)).
 * [Threshold](layers/threshold.html) - performs step function at user defined threshold.
+* [Clip](layers/clip.html) - clips a blob between a fixed minimum and maximum value.
 * [Bias](layers/bias.html) - adds a bias to a blob that can either be learned or fixed.
 * [Scale](layers/scale.html) - scales a blob by an amount that can either be learned or fixed.
 
diff --git a/docs/tutorial/layers/clip.md b/docs/tutorial/layers/clip.md
new file mode 100644
index 00000000000..d6a20f5f826
--- /dev/null
+++ b/docs/tutorial/layers/clip.md
@@ -0,0 +1,20 @@
+---
+title: Clip Layer
+---
+
+# Clip Layer
+
+* Layer type: `Clip`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ClipLayer.html)
+* Header: [`./include/caffe/layers/clip_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/clip_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/clip_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/clip_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/clip_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/clip_layer.cu)
+
+## Parameters
+
+* Parameters (`ClipParameter clip_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+￼{% highlight Protobuf %}
+￼{% include proto/ClipParameter.txt %}
+{% endhighlight %}

From d6d179a410c8e982255e2833c569cb1d465678a7 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Fri, 1 Mar 2019 21:05:47 -0400
Subject: [PATCH 15/17] Updated Intel's branch description

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index fe259535865..46abdb42e90 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ and step-by-step examples.
 
 ## Custom distributions
 
- - [Intel Caffe](https://github.com/BVLC/caffe/tree/intel) (Optimized for CPU and support for multi-node), in particular Xeon processors (HSW, BDW, SKX, Xeon Phi).
+ - [Intel Caffe](https://github.com/BVLC/caffe/tree/intel) (Optimized for CPU and support for multi-node), in particular Xeon processors.
 - [OpenCL Caffe](https://github.com/BVLC/caffe/tree/opencl) e.g. for AMD or Intel devices.
 - [Windows Caffe](https://github.com/BVLC/caffe/tree/windows)
 

From 04ab089db018a292ae48d51732dd6c66766b36b6 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Fri, 1 Mar 2019 21:13:01 -0400
Subject: [PATCH 16/17] Updated Intel's branch description

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 46abdb42e90..3705c55a0a4 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ and step-by-step examples.
 
 ## Custom distributions
 
- - [Intel Caffe](https://github.com/BVLC/caffe/tree/intel) (Optimized for CPU and support for multi-node), in particular Xeon processors.
+ - [Intel Caffe](https://github.com/BVLC/caffe/tree/intel) (Optimized for CPU and support for multi-node), in particular Intel® Xeon processors.
 - [OpenCL Caffe](https://github.com/BVLC/caffe/tree/opencl) e.g. for AMD or Intel devices.
 - [Windows Caffe](https://github.com/BVLC/caffe/tree/windows)
 

From 388bf12ab7826975d95dae1074afbd77b8920600 Mon Sep 17 00:00:00 2001
From: Tim Gates <tim.gates@iress.com>
Date: Thu, 12 Dec 2019 20:30:01 +1100
Subject: [PATCH 17/17] Fix simple typo: overrided -> overridden

Closes #6877
---
 scripts/cpp_lint.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/cpp_lint.py b/scripts/cpp_lint.py
index b2016d4b6dd..fb44026718e 100755
--- a/scripts/cpp_lint.py
+++ b/scripts/cpp_lint.py
@@ -211,7 +211,7 @@
   'whitespace/todo'
   ]
 
-# The default state of the category filter. This is overrided by the --filter=
+# The default state of the category filter. This is overridden by the --filter=
 # flag. By default all errors are on, so only add here categories that should be
 # off by default (i.e., categories that must be enabled by the --filter= flags).
 # All entries here should start with a '-' or '+', as in the --filter= flag.