From c7a63a167b925572429d267134a7f7bfaed71111 Mon Sep 17 00:00:00 2001
From: WRQ <59SaPhire95@gmail.com>
Date: Mon, 11 Jun 2018 13:28:02 +0800
Subject: [PATCH 1/7] update

---
 pytorch_binding/setup.py                    |  3 ++-
 pytorch_binding/src/binding.cpp             |  7 ++++++-
 pytorch_binding/warpctc_pytorch/__init__.py | 16 ++++++----------
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/pytorch_binding/setup.py b/pytorch_binding/setup.py
index a63c3ea..9a97b87 100644
--- a/pytorch_binding/setup.py
+++ b/pytorch_binding/setup.py
@@ -7,7 +7,8 @@
 from torch.utils.ffi import create_extension
 import torch
 
-extra_compile_args = ['-std=c++11', '-fPIC']
+#extra_compile_args = ['-std=c++11', '-fPIC']
+extra_compile_args = ['-std=c99', '-fPIC']
 warp_ctc_path = "../build"
 
 if torch.cuda.is_available() or "CUDA_HOME" in os.environ:
diff --git a/pytorch_binding/src/binding.cpp b/pytorch_binding/src/binding.cpp
index ca80dfc..cc338a9 100644
--- a/pytorch_binding/src/binding.cpp
+++ b/pytorch_binding/src/binding.cpp
@@ -20,6 +20,8 @@ extern "C" int cpu_ctc(THFloatTensor *probs,
                         THIntTensor *label_sizes,
                         THIntTensor *sizes,
                         int minibatch_size,
+                        int blanklabel_index,
+                        int num_threads,
                         THFloatTensor *costs) {
 
     float *probs_ptr = probs->storage->data + probs->storageOffset;
@@ -38,7 +40,8 @@ extern "C" int cpu_ctc(THFloatTensor *probs,
     ctcOptions options;
     memset(&options, 0, sizeof(options));
     options.loc = CTC_CPU;
-    options.num_threads = 0; // will use default number of threads
+    options.blank_label = blanklabel_index;
+    options.num_threads = num_threads; // will use given number of threads
 
 #if defined(CTC_DISABLE_OMP) || defined(APPLE)
     // have to use at least one
@@ -68,6 +71,7 @@ extern "C" int cpu_ctc(THFloatTensor *probs,
                            THIntTensor *label_sizes,
                            THIntTensor *sizes,
                            int minibatch_size,
+                           int blanklabel_index,
                            THFloatTensor *costs) {
 
        float *probs_ptr = probs->storage->data + probs->storageOffset;
@@ -86,6 +90,7 @@ extern "C" int cpu_ctc(THFloatTensor *probs,
        ctcOptions options;
        memset(&options, 0, sizeof(options));
        options.loc = CTC_GPU;
+       options.blank_label = blanklabel_index;
        options.stream = THCState_getCurrentStream(state);
 
        size_t gpu_size_bytes;
diff --git a/pytorch_binding/warpctc_pytorch/__init__.py b/pytorch_binding/warpctc_pytorch/__init__.py
index 5f7cf74..347b794 100644
--- a/pytorch_binding/warpctc_pytorch/__init__.py
+++ b/pytorch_binding/warpctc_pytorch/__init__.py
@@ -9,22 +9,18 @@
 
 class _CTC(Function):
     @staticmethod
-    def forward(ctx, acts, labels, act_lens, label_lens, size_average=False,
+    def forward(ctx, acts, labels, act_lens, label_lens, blank_label=0, num_threads=0, size_average=False,
                 length_average=False):
         is_cuda = True if acts.is_cuda else False
         acts = acts.contiguous()
-        loss_func = warp_ctc.gpu_ctc if is_cuda else warp_ctc.cpu_ctc
         grads = torch.zeros(acts.size()).type_as(acts)
         minibatch_size = acts.size(1)
         costs = torch.zeros(minibatch_size).cpu()
-        loss_func(acts,
-                  grads,
-                  labels,
-                  label_lens,
-                  act_lens,
-                  minibatch_size,
-                  costs)
-
+        if is_cuda:
+            # num_threads will be negeleted in GPU mode
+            warp_ctc.gpu_ctc(acts, grads,labels, label_lens, act_lens, minibatch_size, blank_label, costs)
+        else:
+            warp_ctc.cpu_ctc(acts, grads,labels, label_lens, act_lens, minibatch_size, blank_label, num_threads, costs)
         costs = torch.FloatTensor([costs.sum()])
 
         if length_average:

From 96023b40a5ecbae8e7ca580ac824375933696c44 Mon Sep 17 00:00:00 2001
From: 59SaPhire95 <40117232+59SaPhire95@users.noreply.github.com>
Date: Mon, 11 Jun 2018 15:29:27 +0800
Subject: [PATCH 2/7] Update test_cpu.py

---
 pytorch_binding/tests/test_cpu.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pytorch_binding/tests/test_cpu.py b/pytorch_binding/tests/test_cpu.py
index efc4d15..c7aac6e 100755
--- a/pytorch_binding/tests/test_cpu.py
+++ b/pytorch_binding/tests/test_cpu.py
@@ -17,6 +17,8 @@ def test_simple():
                      label_sizes,
                      sizes,
                      minibatch_size,
+                     blank_label=0,
+                     num_threads=0,
                      costs)
     print('CPU_cost: %f' % costs.sum())
 
@@ -40,6 +42,8 @@ def test_medium(multiplier):
                      label_sizes,
                      sizes,
                      minibatch_size,
+                     blank_label=0,
+                     num_threads=0,
                      costs)
     print('CPU_cost: %f' % costs.sum())
 
@@ -62,6 +66,8 @@ def test_empty_label():
                      label_sizes,
                      sizes,
                      minibatch_size,
+                     blank_label=0,
+                     num_threads=0,
                      costs)
     print('CPU_cost: %f' % costs.sum())
 

From c9c68a88ee1c10f0327dbfa9b80fcc3717c5bf21 Mon Sep 17 00:00:00 2001
From: 59SaPhire95 <40117232+59SaPhire95@users.noreply.github.com>
Date: Mon, 11 Jun 2018 15:30:40 +0800
Subject: [PATCH 3/7] Update test_gpu.py

---
 pytorch_binding/tests/test_gpu.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pytorch_binding/tests/test_gpu.py b/pytorch_binding/tests/test_gpu.py
index 9369ac7..c6580d2 100755
--- a/pytorch_binding/tests/test_gpu.py
+++ b/pytorch_binding/tests/test_gpu.py
@@ -18,6 +18,8 @@ def test_simple():
                      label_sizes,
                      sizes,
                      minibatch_size,
+                     blank_label=0,
+                     num_threads=0,
                      costs)
     print('CPU_cost: %f' % costs.sum())
     probs = probs.clone().cuda()
@@ -29,6 +31,7 @@ def test_simple():
                      label_sizes,
                      sizes,
                      minibatch_size,
+                     blank_label=0,
                      costs)
     print('GPU_cost: %f' % costs.sum())
     print(grads.view(grads.size(0) * grads.size(1), grads.size(2)))
@@ -54,6 +57,8 @@ def test_medium(multiplier):
                      label_sizes,
                      sizes,
                      minibatch_size,
+                     blank_label=0,
+                     num_threads=0,
                      costs)
     print('CPU_cost: %f' % costs.sum())
     probs = probs.clone().cuda()
@@ -89,6 +94,8 @@ def test_empty_label():
                      label_sizes,
                      sizes,
                      minibatch_size,
+                     blank_label=0,
+                     num_threads=0,
                      costs)
     print('CPU_cost: %f' % costs.sum())
     probs = probs.clone().cuda()
@@ -100,6 +107,7 @@ def test_empty_label():
                      label_sizes,
                      sizes,
                      minibatch_size,
+                     blank_label=0,
                      costs)
     print('GPU_cost: %f' % costs.sum())
     print(grads.view(grads.size(0) * grads.size(1), grads.size(2)))

From dbde83a32f75be930805970452bedbf096ab31e8 Mon Sep 17 00:00:00 2001
From: 59SaPhire95 <40117232+59SaPhire95@users.noreply.github.com>
Date: Mon, 11 Jun 2018 15:55:52 +0800
Subject: [PATCH 4/7] Update test_cpu.py

---
 pytorch_binding/tests/test_cpu.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pytorch_binding/tests/test_cpu.py b/pytorch_binding/tests/test_cpu.py
index c7aac6e..a3dccf9 100755
--- a/pytorch_binding/tests/test_cpu.py
+++ b/pytorch_binding/tests/test_cpu.py
@@ -19,7 +19,7 @@ def test_simple():
                      minibatch_size,
                      blank_label=0,
                      num_threads=0,
-                     costs)
+                     costs=costs)
     print('CPU_cost: %f' % costs.sum())
 
 
@@ -44,7 +44,7 @@ def test_medium(multiplier):
                      minibatch_size,
                      blank_label=0,
                      num_threads=0,
-                     costs)
+                     costs=costs)
     print('CPU_cost: %f' % costs.sum())
 
 
@@ -68,7 +68,7 @@ def test_empty_label():
                      minibatch_size,
                      blank_label=0,
                      num_threads=0,
-                     costs)
+                     costs=costs)
     print('CPU_cost: %f' % costs.sum())
 
 

From 315bb2dcb89fc9e21e333224ad155d9f3e1215fc Mon Sep 17 00:00:00 2001
From: 59SaPhire95 <40117232+59SaPhire95@users.noreply.github.com>
Date: Mon, 11 Jun 2018 15:56:44 +0800
Subject: [PATCH 5/7] Update test_gpu.py

---
 pytorch_binding/tests/test_gpu.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pytorch_binding/tests/test_gpu.py b/pytorch_binding/tests/test_gpu.py
index c6580d2..ac91aa6 100755
--- a/pytorch_binding/tests/test_gpu.py
+++ b/pytorch_binding/tests/test_gpu.py
@@ -20,7 +20,7 @@ def test_simple():
                      minibatch_size,
                      blank_label=0,
                      num_threads=0,
-                     costs)
+                     costs=costs)
     print('CPU_cost: %f' % costs.sum())
     probs = probs.clone().cuda()
     grads = torch.zeros(probs.size()).cuda()
@@ -32,7 +32,7 @@ def test_simple():
                      sizes,
                      minibatch_size,
                      blank_label=0,
-                     costs)
+                     costs=costs)
     print('GPU_cost: %f' % costs.sum())
     print(grads.view(grads.size(0) * grads.size(1), grads.size(2)))
 
@@ -59,7 +59,7 @@ def test_medium(multiplier):
                      minibatch_size,
                      blank_label=0,
                      num_threads=0,
-                     costs)
+                     costs=costs)
     print('CPU_cost: %f' % costs.sum())
     probs = probs.clone().cuda()
     grads = torch.zeros(probs.size()).cuda()
@@ -70,7 +70,7 @@ def test_medium(multiplier):
                      label_sizes,
                      sizes,
                      minibatch_size,
-                     costs)
+                     costs=costs)
     print('GPU_cost: %f' % costs.sum())
     print(grads.view(grads.size(0) * grads.size(1), grads.size(2)))
 
@@ -96,7 +96,7 @@ def test_empty_label():
                      minibatch_size,
                      blank_label=0,
                      num_threads=0,
-                     costs)
+                     costs=costs)
     print('CPU_cost: %f' % costs.sum())
     probs = probs.clone().cuda()
     grads = torch.zeros(probs.size()).cuda()
@@ -108,7 +108,7 @@ def test_empty_label():
                      sizes,
                      minibatch_size,
                      blank_label=0,
-                     costs)
+                     costs=costs)
     print('GPU_cost: %f' % costs.sum())
     print(grads.view(grads.size(0) * grads.size(1), grads.size(2)))
 

From 982ba1c41e3b9a0f3a45e442ae6b64d0c5f61ff0 Mon Sep 17 00:00:00 2001
From: 59SaPhire95 <40117232+59SaPhire95@users.noreply.github.com>
Date: Mon, 11 Jun 2018 16:23:31 +0800
Subject: [PATCH 6/7] Update test_cpu.py

---
 pytorch_binding/tests/test_cpu.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pytorch_binding/tests/test_cpu.py b/pytorch_binding/tests/test_cpu.py
index a3dccf9..516ee1d 100755
--- a/pytorch_binding/tests/test_cpu.py
+++ b/pytorch_binding/tests/test_cpu.py
@@ -17,9 +17,9 @@ def test_simple():
                      label_sizes,
                      sizes,
                      minibatch_size,
-                     blank_label=0,
-                     num_threads=0,
-                     costs=costs)
+                     0,
+                     0,
+                     costs)
     print('CPU_cost: %f' % costs.sum())
 
 
@@ -42,9 +42,9 @@ def test_medium(multiplier):
                      label_sizes,
                      sizes,
                      minibatch_size,
-                     blank_label=0,
-                     num_threads=0,
-                     costs=costs)
+                     0,
+                     0,
+                     costs)
     print('CPU_cost: %f' % costs.sum())
 
 
@@ -66,9 +66,9 @@ def test_empty_label():
                      label_sizes,
                      sizes,
                      minibatch_size,
-                     blank_label=0,
-                     num_threads=0,
-                     costs=costs)
+                     0,
+                     0,
+                     costs)
     print('CPU_cost: %f' % costs.sum())
 
 

From 095143c5dfe62a4dd9701c76d1138a9ea6593c15 Mon Sep 17 00:00:00 2001
From: 59SaPhire95 <40117232+59SaPhire95@users.noreply.github.com>
Date: Mon, 11 Jun 2018 16:24:39 +0800
Subject: [PATCH 7/7] Update test_gpu.py

---
 pytorch_binding/tests/test_gpu.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/pytorch_binding/tests/test_gpu.py b/pytorch_binding/tests/test_gpu.py
index ac91aa6..d63cc5d 100755
--- a/pytorch_binding/tests/test_gpu.py
+++ b/pytorch_binding/tests/test_gpu.py
@@ -18,9 +18,9 @@ def test_simple():
                      label_sizes,
                      sizes,
                      minibatch_size,
-                     blank_label=0,
-                     num_threads=0,
-                     costs=costs)
+                     0,
+                     0,
+                     costs)
     print('CPU_cost: %f' % costs.sum())
     probs = probs.clone().cuda()
     grads = torch.zeros(probs.size()).cuda()
@@ -31,8 +31,8 @@ def test_simple():
                      label_sizes,
                      sizes,
                      minibatch_size,
-                     blank_label=0,
-                     costs=costs)
+                     0,
+                     costs)
     print('GPU_cost: %f' % costs.sum())
     print(grads.view(grads.size(0) * grads.size(1), grads.size(2)))
 
@@ -57,9 +57,9 @@ def test_medium(multiplier):
                      label_sizes,
                      sizes,
                      minibatch_size,
-                     blank_label=0,
-                     num_threads=0,
-                     costs=costs)
+                     0,
+                     0,
+                     costs)
     print('CPU_cost: %f' % costs.sum())
     probs = probs.clone().cuda()
     grads = torch.zeros(probs.size()).cuda()
@@ -70,7 +70,8 @@ def test_medium(multiplier):
                      label_sizes,
                      sizes,
                      minibatch_size,
-                     costs=costs)
+                     0,
+                     costs)
     print('GPU_cost: %f' % costs.sum())
     print(grads.view(grads.size(0) * grads.size(1), grads.size(2)))
 
@@ -94,9 +95,9 @@ def test_empty_label():
                      label_sizes,
                      sizes,
                      minibatch_size,
-                     blank_label=0,
-                     num_threads=0,
-                     costs=costs)
+                     0,
+                     0,
+                     costs)
     print('CPU_cost: %f' % costs.sum())
     probs = probs.clone().cuda()
     grads = torch.zeros(probs.size()).cuda()
@@ -107,8 +108,8 @@ def test_empty_label():
                      label_sizes,
                      sizes,
                      minibatch_size,
-                     blank_label=0,
-                     costs=costs)
+                     0,
+                     costs)
     print('GPU_cost: %f' % costs.sum())
     print(grads.view(grads.size(0) * grads.size(1), grads.size(2)))