PaddlePaddle · reyoung · Dec 8, 2016 · Nov 30, 2016 · Nov 30, 2016 · Nov 30, 2016
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -195,3 +195,7 @@ if(WITH_DOC)
     add_subdirectory(doc)
     add_subdirectory(doc_cn)
 endif()
+
+if(USE_OPENCV)
+    add_subdirectory(plugin/opencv)
+endif()
diff --git a/plugin/opencv/CMakeLists.txt b/plugin/opencv/CMakeLists.txt
@@ -0,0 +1,44 @@
+# use opencv plugin
+
+project(DeJpeg CXX C)
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
+set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
+list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
+set(DEJPEG_LINKER_LIBS "")
+
+# opencv
+find_package(OpenCV REQUIRED COMPONENTS core highgui imgproc)
+include_directories(${OpenCV_INCLUDE_DIRS})
+list(APPEND DEJPEG_LINKER_LIBS ${OpenCV_LIBS})
+message(STATUS "OpenCV found (${OpenCV_CONFIG_PATH})")
+add_definitions(-DUSE_OPENCV)
+
+# boost-python
+set(Boost_NO_SYSTEM_PATHS ON)
+if (Boost_NO_SYSTEM_PATHS)
+  set(BOOST_ROOT $ENV{BOOST_ROOT})
+  set(Boost_DIR ${BOOST_ROOT})
+  set(Boost_INCLUDE_DIR "${BOOST_ROOT}/include")
+  set(Boost_LIBRARIES "${BOOST_ROOT}/lib/")
+endif (Boost_NO_SYSTEM_PATHS)
+find_package(Boost 1.46 COMPONENTS python)
+include_directories(SYSTEM ${Boost_INCLUDE_DIR})
+link_directories(${Boost_INCLUDE_DIR})
+message(STATUS "Boost found (${Boost_INCLUDE_DIR})")
+message(STATUS "Boost found (${Boost_LIBRARIES})")
+list(APPEND DEJPEG_LINKER_LIBS ${Boost_LIBRARIES})
+
+
+file(GLOB DEJPEG_HEADER "${CMAKE_CURRENT_SOURCE_DIR}" "*.h")
+file(GLOB DEJPEG_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}" "*.cpp")
+
+set(BUILD_PRIVATE_FLAGS
+    -Wno-all
+    -Wno-error
+    -Wno-non-virtual-dtor
+    -Wno-delete-non-virtual-dtor)
+
+add_library(DeJpeg SHARED ${DEJPEG_SOURCES})
+target_compile_options(DeJpeg BEFORE PRIVATE ${BUILD_PRIVATE_FLAGS})
+target_link_libraries(DeJpeg ${DEJPEG_LINKER_LIBS})
+set_target_properties(DeJpeg PROPERTIES PREFIX "")
diff --git a/plugin/opencv/DataTransformer.cpp b/plugin/opencv/DataTransformer.cpp
@@ -0,0 +1,179 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "DataTransformer.h"
+#include <time.h>
+#include <limits>
+
+DataTransformer::DataTransformer(int threadNum,
+                                 int capacity,
+                                 bool isTest,
+                                 bool isColor,
+                                 int cropHeight,
+                                 int cropWidth,
+                                 int imgSize,
+                                 bool isEltMean,
+                                 bool isChannelMean,
+                                 float* meanValues)
+    : isTest_(isTest),
+      isColor_(isColor),
+      cropHeight_(cropHeight),
+      cropWidth_(cropWidth),
+      imgSize_(imgSize),
+      capacity_(capacity),
+      prefetchFree_(capacity),
+      prefetchFull_(capacity) {
+  fetchCount_ = -1;
+  scale_ = 1.0;
+  isChannelMean_ = isChannelMean;
+  isEltMean_ = isEltMean;
+  loadMean(meanValues);
+
+  imgPixels_ = cropHeight * cropWidth * (isColor_ ? 3 : 1);
+
+  prefetch_.reserve(capacity);
+  for (int i = 0; i < capacity; i++) {
+    auto d = std::make_shared<DataType>(new float[imgPixels_ * 3], 0);
+    prefetch_.push_back(d);
+    memset(prefetch_[i]->first, 0, imgPixels_ * sizeof(float));
+    prefetchFree_.enqueue(prefetch_[i]);
+  }
+
+  numThreads_ = threadNum;
+  syncThreadPool_.reset(new SyncThreadPool(numThreads_, false));
+}
+
+void DataTransformer::loadMean(float* values) {
+  if (values) {
+    int c = isColor_ ? 3 : 1;
+    int sz = isChannelMean_ ? c : cropHeight_ * cropWidth_ * c;
+    meanValues_ = new float[sz];
+    memcpy(meanValues_, values, sz * sizeof(float));
+  }
+}
+
+void DataTransformer::startFetching(const char* src,
+                                    const int size,
+                                    float* trg) {
+  vector<char> imbuf(src, src + size);
+  int cvFlag = (isColor_ ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
+  cv::Mat im = cv::imdecode(cv::Mat(imbuf), cvFlag);
+  if (!im.data) {
+    LOG(ERROR) << "Could not decode image";
+    LOG(ERROR) << im.channels() << " " << im.rows << " " << im.cols;
+  }
+  this->transform(im, trg);
+}
+
+int DataTransformer::Rand(int min, int max) {
+  std::random_device source;
+  std::mt19937 rng(source());
+  std::uniform_int_distribution<int> dist(min, max);
+  return dist(rng);
+}
+
+void DataTransformer::transform(Mat& cvImgOri, float* target) {
+  const int imgChannels = cvImgOri.channels();
+  const int imgHeight = cvImgOri.rows;
+  const int imgWidth = cvImgOri.cols;
+  const bool doMirror = (!isTest_) && Rand(0, 1);
+  int h_off = 0;
+  int w_off = 0;
+  int th = imgHeight;
+  int tw = imgWidth;
+  cv::Mat img;
+  if (imgSize_ > 0) {
+    if (imgHeight > imgWidth) {
+      tw = imgSize_;
+      th = int(double(imgHeight) / imgWidth * tw);
+      th = th > imgSize_ ? th : imgSize_;
+    } else {
+      th = imgSize_;
+      tw = int(double(imgWidth) / imgHeight * th);
+      tw = tw > imgSize_ ? tw : imgSize_;
+    }
+    cv::resize(cvImgOri, img, cv::Size(tw, th));
+  } else {
+    cv::Mat img = cvImgOri;
+  }
+
+  cv::Mat cv_cropped_img = img;
+  if (cropHeight_ && cropWidth_) {
+    if (!isTest_) {
+      h_off = Rand(0, th - cropHeight_);
+      w_off = Rand(0, tw - cropWidth_);
+    } else {
+      h_off = (th - cropHeight_) / 2;
+      w_off = (tw - cropWidth_) / 2;
+    }
+    cv::Rect roi(w_off, h_off, cropWidth_, cropHeight_);
+    cv_cropped_img = img(roi);
+  } else {
+    CHECK_EQ(cropHeight_, imgHeight);
+    CHECK_EQ(cropWidth_, imgWidth);
+  }
+  int height = cropHeight_;
+  int width = cropWidth_;
+  int top_index;
+  for (int h = 0; h < height; ++h) {
+    const uchar* ptr = cv_cropped_img.ptr<uchar>(h);
+    int img_index = 0;
+    for (int w = 0; w < width; ++w) {
+      for (int c = 0; c < imgChannels; ++c) {
+        if (doMirror) {
+          top_index = (c * height + h) * width + width - 1 - w;
+        } else {
+          top_index = (c * height + h) * width + w;
+        }
+        float pixel = static_cast<float>(ptr[img_index++]);
+        if (isEltMean_) {
+          int mean_index = (c * imgHeight + h) * imgWidth + w;
+          target[top_index] = (pixel - meanValues_[mean_index]) * scale_;
+        } else {
+          if (isChannelMean_) {
+            target[top_index] = (pixel - meanValues_[c]) * scale_;
+          } else {
+            target[top_index] = pixel * scale_;
+          }
+        }
+      }
+    }
+  }  // target: BGR
+}
+
+void DataTransformer::start(vector<char*>& data, int* datalen, int* labels) {
+  auto job = [&](int tid, int numThreads) {
+    for (size_t i = tid; i < data.size(); i += numThreads) {
+      DataTypePtr ret = prefetchFree_.dequeue();
+      char* buf = data[i];
+      int size = datalen[i];
+      ret->second = labels[i];
+      this->startFetching(buf, size, ret->first);
+      prefetchFull_.enqueue(ret);
+    }
+  };
+  syncThreadPool_->exec(job);
+  fetchCount_ = data.size();
+}
+
+void DataTransformer::obtain(float* data, int* label) {
+  fetchCount_--;
+  if (fetchCount_ < 0) {
+    LOG(FATAL) << "Empty data";
+  }
+  DataTypePtr ret = prefetchFull_.dequeue();
+  *label = ret->second;
+  memcpy(data, ret->first, sizeof(float) * imgPixels_);
+  prefetchFree_.enqueue(ret);
+}
diff --git a/plugin/opencv/DataTransformer.h b/plugin/opencv/DataTransformer.h
@@ -0,0 +1,121 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include <fstream>
+#include <opencv2/opencv.hpp>
+#include <vector>
+#include <string>
+#include <algorithm>
+
+#include "paddle/utils/Thread.h"
+
+using namespace cv;
+using namespace paddle;
+
+/**
+ * This is an image processing module with OpenCV, such as
+ * resizing, scaling, mirroring, substracting the image mean...
+ *
+ * This class has a double BlockQueue and they shared the same memory.
+ * It is used to avoid create memory each time. And it also can
+ * return the data even if the data are processing in multi-threads.
+ */
+class DataTransformer {
+public:
+  DataTransformer(int threadNum,
+                  int capacity,
+                  bool isTest,
+                  bool isColor,
+                  int cropHeight,
+                  int cropWidth,
+                  int imgSize,
+                  bool isEltMean,
+                  bool isChannelMean,
+                  float* meanValues);
+  virtual ~DataTransformer() {
+    if (meanValues_) {
+      free(meanValues_);
+    }
+  }
+
+  /**
+   * @brief Start multi-threads to transform a list of input data.
+   * The processed data will be saved in Queue of prefetchFull_.
+   *
+   * @param data   Data containing the image string to be transformed.
+   * @param label  The label of input image.
+   */
+  void start(vector<char*>& data, int* datalen, int* labels);
+
+  /**
+   * @brief Applies the transformation on one image Mat.
+   *
+   * @param img    The input img to be transformed.
+   * @param target target is used to save the transformed data.
+   */
+  void transform(Mat& img, float* target);
+
+  /**
+   * @brief Decode the image string, then calls transform() function.
+   *
+   * @param src  The input image string.
+   * @param size The length of string.
+   * @param trg  trg is used to save the transformed data.
+   */
+  void startFetching(const char* src, const int size, float* trg);
+
+  /**
+   * @brief Return the transformed data and its label.
+   */
+  void obtain(float* data, int* label);
+
+private:
+  int isTest_;
+  int isColor_;
+  int cropHeight_;
+  int cropWidth_;
+  int imgSize_;
+  int capacity_;
+  int fetchCount_;
+  bool isEltMean_;
+  bool isChannelMean_;
+  int numThreads_;
+  float scale_;
+  int imgPixels_;
+  float* meanValues_;
+
+  /**
+   * Initialize the mean values.
+   */
+  void loadMean(float* values);
+
+  /**
+   * @brief Generates a random integer from Uniform({min, min + 1, ..., max}).
+   * @param min The lower bound (inclusive) value of the random number.
+   * @param max The upper bound (inclusive) value of the random number.
+   *
+   * @return
+   * A uniformly random integer value from ({min, min + 1, ..., max}).
+   */
+  int Rand(int min, int max);
+
+  typedef std::pair<float*, int> DataType;
+  typedef std::shared_ptr<DataType> DataTypePtr;
+  std::vector<DataTypePtr> prefetch_;
+  std::unique_ptr<SyncThreadPool> syncThreadPool_;
+  BlockingQueue<DataTypePtr> prefetchFree_;
+  BlockingQueue<DataTypePtr> prefetchFull_;
+
+};  // class DataTransformer