sadeepj · heyitsguay · Aug 7, 2018 · Aug 7, 2018 · Aug 23, 2018 · Aug 23, 2018
diff --git a/.gitattributes b/.gitattributes
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 *.pyc
-src/cpp/high_dim_filter.so
+*.so
+crfrnn_keras_model.h5
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -1,4 +1,6 @@
 # CRF-RNN for Semantic Image Segmentation - Keras/Tensorflow version
+## Forked from [sadeepj/crfasrnn_keras](https://github.com/sadeepj/crfasrnn_keras).
+#### Credit for all content below due to sadeepj/crfasrnn_keras contributors
 ![sample](sample.png)
 
 <b>Live demo:</b> &nbsp;&nbsp;&nbsp;&nbsp; [http://crfasrnn.torr.vision](http://crfasrnn.torr.vision) <br/>

diff --git a/download_model_weights.sh b/download_model_weights.sh
@@ -0,0 +1 @@
+wget https://github.com/sadeepj/crfasrnn_keras/releases/download/v1.0/crfrnn_keras_model.h5
diff --git a/image.jpg b/image.jpg
diff --git a/labels.png b/labels.png
diff --git a/requirements.txt b/requirements.txt
diff --git a/requirements_gpu.txt b/requirements_gpu.txt
diff --git a/run_demo.py b/run_demo.py
@@ -23,7 +23,7 @@
 """
 
 import sys
-sys.path.insert(1, './src')
+sys.path.insert(1, './src/python')
 from crfrnn_model import get_crfrnn_model_def
 import util
 

diff --git a/sample.png b/sample.png
diff --git a/src/cpp/Makefile b/src/cpp/Makefile
@@ -12,13 +12,15 @@
 
 # Define the compiler
 CC := g++
+# Define the target python version
+PYTHON := python3.6
 
 # Read Tensorflow paths
-TF_INC := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
-TF_LIB := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
+TF_INC := $(shell $(PYTHON) -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
+TF_LIB := $(shell $(PYTHON) -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
 
 # Is the Tensorflow version >= 1.4?
-TF_VERSION_GTE_1_4 := $(shell expr `python -c 'import tensorflow as tf; print(tf.__version__)' | cut -f1,2 -d.` \>= 1.4)
+TF_VERSION_GTE_1_4 := $(shell expr `$(PYTHON) -c 'import tensorflow as tf; print(tf.__version__)' | cut -f1,2 -d.` \>= 1.4)
 
 # Flags required for all cases
 CFLAGS := -std=c++11 -D_GLIBCXX_USE_CXX11_ABI=0 -shared -fPIC -I$(TF_INC) -O2 
@@ -40,9 +42,9 @@ endif
 .PHONY: all clean
 
 high_dim_filter.so: high_dim_filter.cc modified_permutohedral.cc
-	$(CC) $(CFLAGS) -o high_dim_filter.so high_dim_filter.cc modified_permutohedral.cc $(LDFLAGS)
+	$(CC) $(CFLAGS) -o build/high_dim_filter.so high_dim_filter.cc modified_permutohedral.cc $(LDFLAGS)
 
 clean:
-	$(RM) high_dim_filter.so
+	$(RM) $(OUTDIR)/high_dim_filter.so
 
 all: high_dim_filter.so
diff --git a/src/cpp/high_dim_filter.cc b/src/cpp/high_dim_filter.cc
@@ -30,45 +30,99 @@
 
 using namespace tensorflow;
 
-void compute_spatial_kernel(float * const output_kernel, const int width,
-                            const int height, const float theta_gamma) {
+void compute_spatial_kernel(float * const output_kernel,
+                            const int width,
+                            const int height,
+                            const float theta_gamma) {
 
-  const int num_pixels = width * height;
-  for (int p = 0; p < num_pixels; ++p) {
-    output_kernel[2 * p] = static_cast<float>(p % width) / theta_gamma;
-    output_kernel[2 * p + 1] = static_cast<float>(p / width) / theta_gamma;
-  }
+    const int num_pixels = width * height;
+    for (int p = 0; p < num_pixels; ++p) {
+        output_kernel[2 * p] = static_cast<float>(p % width) / theta_gamma;
+        output_kernel[2 * p + 1] = static_cast<float>(p / width) / theta_gamma;
+    }
 }
 
-void compute_bilateral_kernel(float * const output_kernel, const Tensor& rgb_tensor,
-                              const float theta_alpha, const float theta_beta) {
-
-  const int height = rgb_tensor.dim_size(1);
-  const int width = rgb_tensor.dim_size(2);
-  const int num_pixels = height * width;
-  auto rgb = rgb_tensor.flat<float>();
+void compute_spatial_kernel_3d(float * const output_kernel,
+                               const int width,
+                               const int height,
+                               const int depth,
+                               const float theta_gamma,
+                               const float theta_gamma_z) {
+    const int hw = height * width;
+    const int num_voxels = depth * height * width;
+    for (int p = 0; p < num_voxels; ++p) {
+        output_kernel[3 * p] = static_cast<float>(p % width) / theta_gamma;
+        output_kernel[3 * p + 1] = static_cast<float>(p / width) / theta_gamma;
+        output_kernel[3 * p + 2] = static_cast<float>(p / hw) / theta_gamma_z;
+    }
+}
 
-  for (int p = 0; p < num_pixels; ++p) {
-    // Spatial terms
-    output_kernel[5 * p] = static_cast<float>(p % width) / theta_alpha;
-    output_kernel[5 * p + 1] = static_cast<float>(p / width) / theta_alpha;
+void compute_bilateral_kernel(float * const output_kernel,
+                              const Tensor& image_tensor,
+                              const float theta_alpha,
+                              const float theta_beta) {
+
+    const int unary_channels = image_tensor.dim_size(0);
+    const int height = image_tensor.dim_size(1);
+    const int width = image_tensor.dim_size(2);
+    const int num_pixels = height * width;
+    auto rgb = image_tensor.flat<float>();
+
+    // Number of output unary_channels: rgb unary_channels plus two spatial (x, y) unary_channels
+    const int oc = unary_channels + 2;
+    for (int p = 0; p < num_pixels; ++p) {
+        // Spatial terms
+        output_kernel[oc * p] = static_cast<float>(p % width) / theta_alpha;
+        output_kernel[oc * p + 1] = static_cast<float>(p / width) / theta_alpha;
+
+        // Color channel terms
+        for (int i = 0; i < unary_channels; ++i) {
+            output_kernel[oc * p + i + 2] =
+                    static_cast<float>(rgb(p + i * num_pixels) / theta_beta);
+        }
+    }
+}
 
-    // Color terms
-    output_kernel[5 * p + 2] = static_cast<float>(rgb(p) / theta_beta);
-    output_kernel[5 * p + 3] = static_cast<float>(rgb(num_pixels + p) / theta_beta);
-    output_kernel[5 * p + 4] = static_cast<float>(rgb(2 * num_pixels + p) / theta_beta);
-  }
+void compute_bilateral_kernel_3d(float * const output_kernel,
+                                 const Tensor& image_tensor,
+                                 const float theta_alpha,
+                                 const float theta_alpha_z,
+                                 const float theta_beta) {
+    const int unary_channels = image_tensor.dim_size(0);
+    const int depth = image_tensor.dim_size(1);
+    const int height = image_tensor.dim_size(2);
+    const int width = image_tensor.dim_size(3);
+    const int hw = height * width;
+    const int num_pixels = depth * height * width;
+
+    auto rgb = image_tensor.flat<float>();
+
+    const int oc = unary_channels + 3;
+    for (int p = 0; p < num_pixels; ++p) {
+        output_kernel[oc * p] = static_cast<float>(p % width) / theta_alpha;
+        output_kernel[oc * p + 1] = static_cast<float>(p / width) / theta_alpha;
+        output_kernel[oc * p + 2] = static_cast<float>(p / hw) / theta_alpha_z;
+
+        // Color channel terms
+        for (int i = 0; i < unary_channels; ++i) {
+            output_kernel[oc * p + i + 3] =
+                    static_cast<float>(rgb(p + i * num_pixels)) / theta_beta;
+        }
+    }
 }
 
 REGISTER_OP("HighDimFilter")
+    .Attr("T: {float}")
     .Attr("bilateral: bool")
     .Attr("theta_alpha: float = 1.0")
+    .Attr("theta_alpha_z: float = 1.0")
     .Attr("theta_beta: float = 1.0")
     .Attr("theta_gamma: float = 1.0")
+    .Attr("theta_gamma_z: float = 1.0")
     .Attr("backwards: bool = false")
-    .Input("raw: float32")
-    .Input("rgb: float32")
-    .Output("filtered: float32")
+    .Input("raw: T")
+    .Input("rgb: T")
+    .Output("filtered: T")
     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
       c->set_output(0, c->input(0));
       return Status::OK();
@@ -82,46 +136,76 @@ class HighDimFilterOp : public OpKernel {
                    context->GetAttr("bilateral", &bilateral_));
     OP_REQUIRES_OK(context,
                    context->GetAttr("theta_alpha", &theta_alpha_));
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("theta_alpha_z", &theta_alpha_z_));
     OP_REQUIRES_OK(context,
                    context->GetAttr("theta_beta", &theta_beta_));
     OP_REQUIRES_OK(context,
                    context->GetAttr("theta_gamma", &theta_gamma_));
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("theta_gamma_z", &theta_gamma_z_));
     OP_REQUIRES_OK(context,
                    context->GetAttr("backwards", &backwards_));
   }
 
   void Compute(OpKernelContext* context) override {
 
     // Grab the unary tensor
-    const Tensor& input_tensor = context->input(0);
+    const Tensor& unary_tensor = context->input(0);
     // Grab the RGB image tensor
     const Tensor& image_tensor = context->input(1);
-
-    const int channels = input_tensor.dim_size(0);
-    const int height = input_tensor.dim_size(1);
-    const int width = input_tensor.dim_size(2);
-    const int num_pixels = width * height;
+
+    const int spatial_dims = image_tensor.dims() - 1;
+    const bool is_3d = spatial_dims == 3;
+
+    const int image_channels = image_tensor.dim_size(0);
+    const int bilateral_channels = image_channels + spatial_dims;
+    const int unary_channels = unary_tensor.dim_size(0);
+    const int depth = is_3d ? image_tensor.dim_size(1) : 1;
+    const int height = image_tensor.dim_size(spatial_dims - 1);
+    const int width = image_tensor.dim_size(spatial_dims);
+    const int num_pixels = width * height * depth;
 
     // Create the output tensor
     Tensor* output_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
+    OP_REQUIRES_OK(context, context->allocate_output(0, unary_tensor.shape(),
                                                      &output_tensor));
     ModifiedPermutohedral mp;
 
     if (bilateral_) {
-      float * const kernel_vals = new float[5 * num_pixels];
-      compute_bilateral_kernel(kernel_vals, image_tensor,
-                               theta_alpha_, theta_beta_);
-      mp.init(kernel_vals, 5, num_pixels);
-      mp.compute(*output_tensor, input_tensor, channels, backwards_);
-
+      float * const kernel_vals = new float[bilateral_channels * num_pixels];
+      if (is_3d) {
+        compute_bilateral_kernel_3d(kernel_vals,
+                                    image_tensor,
+                                    theta_alpha_,
+                                    theta_alpha_z_,
+                                    theta_beta_);
+      } else {
+        compute_bilateral_kernel(kernel_vals,
+                                 image_tensor,
+                                 theta_alpha_,
+                                 theta_beta_);
+      }
+      mp.init(kernel_vals, bilateral_channels, num_pixels);
+      mp.compute(*output_tensor, unary_tensor, unary_channels, backwards_);
       delete[] kernel_vals;
     } else {
-      float * const kernel_vals = new float[2 * num_pixels];
-      compute_spatial_kernel(kernel_vals, width, height, theta_gamma_);
-      mp.init(kernel_vals, 2, num_pixels);
-      mp.compute(*output_tensor, input_tensor, channels, backwards_);
-
+      float * const kernel_vals = new float[spatial_dims * num_pixels];
+      if (is_3d) {
+        compute_spatial_kernel_3d(kernel_vals,
+                                  width,
+                                  height,
+                                  depth,
+                                  theta_gamma_,
+                                  theta_gamma_z_);
+      } else {
+        compute_spatial_kernel(kernel_vals,
+                               width,
+                               height,
+                               theta_gamma_);
+      }
+      mp.init(kernel_vals, spatial_dims, num_pixels);
+      mp.compute(*output_tensor, unary_tensor, unary_channels, backwards_);
       delete[] kernel_vals;
     }
 
@@ -130,8 +214,10 @@ class HighDimFilterOp : public OpKernel {
  private:
   bool bilateral_;
   float theta_alpha_;
+  float theta_alpha_z_;
   float theta_beta_;
   float theta_gamma_;
+  float theta_gamma_z_;
   bool backwards_;
 };
 

diff --git a/src/cpp/modified_permutohedral.cc b/src/cpp/modified_permutohedral.cc
diff --git a/src/cpp/modified_permutohedral.h b/src/cpp/modified_permutohedral.h
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		wget https://github.com/sadeepj/crfasrnn_keras/releases/download/v1.0/crfrnn_keras_model.h5