Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for 3D spatial data and arbitrary number of image channels #41

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified .gitattributes
100644 → 100755
Empty file.
3 changes: 2 additions & 1 deletion .gitignore
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
*.pyc
src/cpp/high_dim_filter.so
*.so
crfrnn_keras_model.h5
Empty file modified LICENSE
100644 → 100755
Empty file.
2 changes: 2 additions & 0 deletions README.md
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# CRF-RNN for Semantic Image Segmentation - Keras/Tensorflow version
## Forked from [sadeepj/crfasrnn_keras](https://github.com/sadeepj/crfasrnn_keras).
#### Credit for all content below due to sadeepj/crfasrnn_keras contributors
![sample](sample.png)

<b>Live demo:</b> &nbsp;&nbsp;&nbsp;&nbsp; [http://crfasrnn.torr.vision](http://crfasrnn.torr.vision) <br/>
Expand Down
1 change: 1 addition & 0 deletions download_model_weights.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
wget https://github.com/sadeepj/crfasrnn_keras/releases/download/v1.0/crfrnn_keras_model.h5
Empty file modified image.jpg
100644 → 100755
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added labels.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file modified requirements.txt
100644 → 100755
Empty file.
Empty file modified requirements_gpu.txt
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion run_demo.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"""

import sys
sys.path.insert(1, './src')
sys.path.insert(1, './src/python')
from crfrnn_model import get_crfrnn_model_def
import util

Expand Down
Empty file modified sample.png
100644 → 100755
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
12 changes: 7 additions & 5 deletions src/cpp/Makefile
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@

# Define the compiler
CC := g++
# Define the target python version
PYTHON := python3.6

# Read Tensorflow paths
TF_INC := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
TF_LIB := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
TF_INC := $(shell $(PYTHON) -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
TF_LIB := $(shell $(PYTHON) -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')

# Is the Tensorflow version >= 1.4?
TF_VERSION_GTE_1_4 := $(shell expr `python -c 'import tensorflow as tf; print(tf.__version__)' | cut -f1,2 -d.` \>= 1.4)
TF_VERSION_GTE_1_4 := $(shell expr `$(PYTHON) -c 'import tensorflow as tf; print(tf.__version__)' | cut -f1,2 -d.` \>= 1.4)

# Flags required for all cases
CFLAGS := -std=c++11 -D_GLIBCXX_USE_CXX11_ABI=0 -shared -fPIC -I$(TF_INC) -O2
Expand All @@ -40,9 +42,9 @@ endif
.PHONY: all clean

high_dim_filter.so: high_dim_filter.cc modified_permutohedral.cc
$(CC) $(CFLAGS) -o high_dim_filter.so high_dim_filter.cc modified_permutohedral.cc $(LDFLAGS)
$(CC) $(CFLAGS) -o build/high_dim_filter.so high_dim_filter.cc modified_permutohedral.cc $(LDFLAGS)

clean:
$(RM) high_dim_filter.so
$(RM) $(OUTDIR)/high_dim_filter.so

all: high_dim_filter.so
174 changes: 130 additions & 44 deletions src/cpp/high_dim_filter.cc
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -30,45 +30,99 @@

using namespace tensorflow;

void compute_spatial_kernel(float * const output_kernel, const int width,
const int height, const float theta_gamma) {
void compute_spatial_kernel(float * const output_kernel,
const int width,
const int height,
const float theta_gamma) {

const int num_pixels = width * height;
for (int p = 0; p < num_pixels; ++p) {
output_kernel[2 * p] = static_cast<float>(p % width) / theta_gamma;
output_kernel[2 * p + 1] = static_cast<float>(p / width) / theta_gamma;
}
const int num_pixels = width * height;
for (int p = 0; p < num_pixels; ++p) {
output_kernel[2 * p] = static_cast<float>(p % width) / theta_gamma;
output_kernel[2 * p + 1] = static_cast<float>(p / width) / theta_gamma;
}
}

void compute_bilateral_kernel(float * const output_kernel, const Tensor& rgb_tensor,
const float theta_alpha, const float theta_beta) {

const int height = rgb_tensor.dim_size(1);
const int width = rgb_tensor.dim_size(2);
const int num_pixels = height * width;
auto rgb = rgb_tensor.flat<float>();
void compute_spatial_kernel_3d(float * const output_kernel,
const int width,
const int height,
const int depth,
const float theta_gamma,
const float theta_gamma_z) {
const int hw = height * width;
const int num_voxels = depth * height * width;
for (int p = 0; p < num_voxels; ++p) {
output_kernel[3 * p] = static_cast<float>(p % width) / theta_gamma;
output_kernel[3 * p + 1] = static_cast<float>(p / width) / theta_gamma;
output_kernel[3 * p + 2] = static_cast<float>(p / hw) / theta_gamma_z;
}
}

for (int p = 0; p < num_pixels; ++p) {
// Spatial terms
output_kernel[5 * p] = static_cast<float>(p % width) / theta_alpha;
output_kernel[5 * p + 1] = static_cast<float>(p / width) / theta_alpha;
void compute_bilateral_kernel(float * const output_kernel,
const Tensor& image_tensor,
const float theta_alpha,
const float theta_beta) {

const int unary_channels = image_tensor.dim_size(0);
const int height = image_tensor.dim_size(1);
const int width = image_tensor.dim_size(2);
const int num_pixels = height * width;
auto rgb = image_tensor.flat<float>();

// Number of output unary_channels: rgb unary_channels plus two spatial (x, y) unary_channels
const int oc = unary_channels + 2;
for (int p = 0; p < num_pixels; ++p) {
// Spatial terms
output_kernel[oc * p] = static_cast<float>(p % width) / theta_alpha;
output_kernel[oc * p + 1] = static_cast<float>(p / width) / theta_alpha;

// Color channel terms
for (int i = 0; i < unary_channels; ++i) {
output_kernel[oc * p + i + 2] =
static_cast<float>(rgb(p + i * num_pixels) / theta_beta);
}
}
}

// Color terms
output_kernel[5 * p + 2] = static_cast<float>(rgb(p) / theta_beta);
output_kernel[5 * p + 3] = static_cast<float>(rgb(num_pixels + p) / theta_beta);
output_kernel[5 * p + 4] = static_cast<float>(rgb(2 * num_pixels + p) / theta_beta);
}
void compute_bilateral_kernel_3d(float * const output_kernel,
const Tensor& image_tensor,
const float theta_alpha,
const float theta_alpha_z,
const float theta_beta) {
const int unary_channels = image_tensor.dim_size(0);
const int depth = image_tensor.dim_size(1);
const int height = image_tensor.dim_size(2);
const int width = image_tensor.dim_size(3);
const int hw = height * width;
const int num_pixels = depth * height * width;

auto rgb = image_tensor.flat<float>();

const int oc = unary_channels + 3;
for (int p = 0; p < num_pixels; ++p) {
output_kernel[oc * p] = static_cast<float>(p % width) / theta_alpha;
output_kernel[oc * p + 1] = static_cast<float>(p / width) / theta_alpha;
output_kernel[oc * p + 2] = static_cast<float>(p / hw) / theta_alpha_z;

// Color channel terms
for (int i = 0; i < unary_channels; ++i) {
output_kernel[oc * p + i + 3] =
static_cast<float>(rgb(p + i * num_pixels)) / theta_beta;
}
}
}

REGISTER_OP("HighDimFilter")
.Attr("T: {float}")
.Attr("bilateral: bool")
.Attr("theta_alpha: float = 1.0")
.Attr("theta_alpha_z: float = 1.0")
.Attr("theta_beta: float = 1.0")
.Attr("theta_gamma: float = 1.0")
.Attr("theta_gamma_z: float = 1.0")
.Attr("backwards: bool = false")
.Input("raw: float32")
.Input("rgb: float32")
.Output("filtered: float32")
.Input("raw: T")
.Input("rgb: T")
.Output("filtered: T")
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
c->set_output(0, c->input(0));
return Status::OK();
Expand All @@ -82,46 +136,76 @@ class HighDimFilterOp : public OpKernel {
context->GetAttr("bilateral", &bilateral_));
OP_REQUIRES_OK(context,
context->GetAttr("theta_alpha", &theta_alpha_));
OP_REQUIRES_OK(context,
context->GetAttr("theta_alpha_z", &theta_alpha_z_));
OP_REQUIRES_OK(context,
context->GetAttr("theta_beta", &theta_beta_));
OP_REQUIRES_OK(context,
context->GetAttr("theta_gamma", &theta_gamma_));
OP_REQUIRES_OK(context,
context->GetAttr("theta_gamma_z", &theta_gamma_z_));
OP_REQUIRES_OK(context,
context->GetAttr("backwards", &backwards_));
}

void Compute(OpKernelContext* context) override {

// Grab the unary tensor
const Tensor& input_tensor = context->input(0);
const Tensor& unary_tensor = context->input(0);
// Grab the RGB image tensor
const Tensor& image_tensor = context->input(1);

const int channels = input_tensor.dim_size(0);
const int height = input_tensor.dim_size(1);
const int width = input_tensor.dim_size(2);
const int num_pixels = width * height;

const int spatial_dims = image_tensor.dims() - 1;
const bool is_3d = spatial_dims == 3;

const int image_channels = image_tensor.dim_size(0);
const int bilateral_channels = image_channels + spatial_dims;
const int unary_channels = unary_tensor.dim_size(0);
const int depth = is_3d ? image_tensor.dim_size(1) : 1;
const int height = image_tensor.dim_size(spatial_dims - 1);
const int width = image_tensor.dim_size(spatial_dims);
const int num_pixels = width * height * depth;

// Create the output tensor
Tensor* output_tensor = NULL;
OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
OP_REQUIRES_OK(context, context->allocate_output(0, unary_tensor.shape(),
&output_tensor));
ModifiedPermutohedral mp;

if (bilateral_) {
float * const kernel_vals = new float[5 * num_pixels];
compute_bilateral_kernel(kernel_vals, image_tensor,
theta_alpha_, theta_beta_);
mp.init(kernel_vals, 5, num_pixels);
mp.compute(*output_tensor, input_tensor, channels, backwards_);

float * const kernel_vals = new float[bilateral_channels * num_pixels];
if (is_3d) {
compute_bilateral_kernel_3d(kernel_vals,
image_tensor,
theta_alpha_,
theta_alpha_z_,
theta_beta_);
} else {
compute_bilateral_kernel(kernel_vals,
image_tensor,
theta_alpha_,
theta_beta_);
}
mp.init(kernel_vals, bilateral_channels, num_pixels);
mp.compute(*output_tensor, unary_tensor, unary_channels, backwards_);
delete[] kernel_vals;
} else {
float * const kernel_vals = new float[2 * num_pixels];
compute_spatial_kernel(kernel_vals, width, height, theta_gamma_);
mp.init(kernel_vals, 2, num_pixels);
mp.compute(*output_tensor, input_tensor, channels, backwards_);

float * const kernel_vals = new float[spatial_dims * num_pixels];
if (is_3d) {
compute_spatial_kernel_3d(kernel_vals,
width,
height,
depth,
theta_gamma_,
theta_gamma_z_);
} else {
compute_spatial_kernel(kernel_vals,
width,
height,
theta_gamma_);
}
mp.init(kernel_vals, spatial_dims, num_pixels);
mp.compute(*output_tensor, unary_tensor, unary_channels, backwards_);
delete[] kernel_vals;
}

Expand All @@ -130,8 +214,10 @@ class HighDimFilterOp : public OpKernel {
private:
bool bilateral_;
float theta_alpha_;
float theta_alpha_z_;
float theta_beta_;
float theta_gamma_;
float theta_gamma_z_;
bool backwards_;
};

Expand Down
Empty file modified src/cpp/modified_permutohedral.cc
100644 → 100755
Empty file.
Empty file modified src/cpp/modified_permutohedral.h
100644 → 100755
Empty file.
Loading