[Type Casting Pass] Add support for fp64->fp32 conversion

alibaba · Nov 30, 2021 · 0750995 · 0750995
1 parent edf5770
commit 0750995
Show file tree

Hide file tree

Showing 7 changed files with 66 additions and 44 deletions.
diff --git a/include/halo/lib/transforms/typecast.h b/include/halo/lib/transforms/typecast.h
@@ -25,7 +25,7 @@ namespace halo {
 /// This pass eliminates usage of int64 by casting down.
 class TypeCast final : public FunctionPass {
  public:
-  TypeCast() : FunctionPass("Cast down int64 to int32") {}
+  TypeCast() : FunctionPass("Cast down int64/fp64 to int32/fp32") {}
 
   bool RunOnFunction(Function* func) override;
 };

diff --git a/lib/transforms/typecast.cc b/lib/transforms/typecast.cc
@@ -23,6 +23,22 @@
 
 namespace halo {
 
+template <typename Tsrc, typename Tdst>
+static bool ReplaceConstant(Constant* c, DataType src_type, DataType new_type) {
+  const auto& orig_type = c->GetResultType(0);
+  if (orig_type.GetDataType() != src_type) {
+    return false;
+  }
+  std::vector<Tdst> ret;
+  auto n = orig_type.GetTotalNumOfElements();
+  ret.reserve(n);
+  for (unsigned int i = 0; i < n; ++i) {
+    ret.push_back(static_cast<float>(c->GetData<Tsrc>(i)));
+  }
+  c->SetData(halo::Type{new_type, orig_type.GetDimSizes()}, ret.data());
+  return true;
+}
+
 bool TypeCast::RunOnFunction(Function* func) {
   bool changed = false;
   // Replace arguments.
@@ -32,25 +48,27 @@ bool TypeCast::RunOnFunction(Function* func) {
       halo::Type new_ty{DataType::INT32, ty.GetDimSizes()};
       arg->SetType(new_ty);
       changed |= true;
+    } else if (ty.GetDataType() == DataType::FLOAT64) {
+      halo::Type new_ty{DataType::FLOAT32, ty.GetDimSizes()};
+      arg->SetType(new_ty);
+      changed |= true;
     }
   }
+
   // Replace constants.
-  ConstantBuilder cb(func);
-  Function::ConstantList& constants = func->Constants();
-  for (auto it = constants.begin(), ie = constants.end(); it != ie; ++it) {
-    const auto& orig_type = (*it)->GetResultType(0);
-    if (orig_type.GetDataType() == DataType::INT64) {
-      std::vector<int> ret;
-      ret.reserve(orig_type.GetTotalNumOfElements());
-      for (unsigned int i = 0; i < orig_type.GetTotalNumOfElements(); ++i) {
-        ret.push_back(static_cast<int>((*it)->GetData<int64_t>(i)));
-      }
-      Constant* c_ret = cb.CreateConstant(
-          (*it)->GetName() + "_castdown",
-          halo::Type{DataType::INT32, orig_type.GetDimSizes()}, ret.data());
-      (*it)->ReplaceAllUsesWith(0, *c_ret);
-      changed = true;
-    }
+  Module* m = func->GetParent();
+  for (auto& c : m->Constants()) {
+    changed |= ReplaceConstant<double, float>(c.get(), DataType::FLOAT64,
+                                              DataType::FLOAT32);
+    changed |= ReplaceConstant<int64_t, int32_t>(c.get(), DataType::INT64,
+                                                 DataType::INT32);
+  }
+
+  for (auto& c : func->Constants()) {
+    changed |= ReplaceConstant<double, float>(c.get(), DataType::FLOAT64,
+                                              DataType::FLOAT32);
+    changed |= ReplaceConstant<int64_t, int32_t>(c.get(), DataType::INT64,
+                                                 DataType::INT32);
   }
 
   for (auto& bb : *func) {
@@ -60,11 +78,15 @@ bool TypeCast::RunOnFunction(Function* func) {
         if (orig_type.IsValid() && orig_type.GetDataType() == DataType::INT64) {
           inst->GetResultsTypes()[i] =
               halo::Type{DataType::INT32, orig_type.GetDimSizes()};
+        } else if (orig_type.IsValid() &&
+                   orig_type.GetDataType() == DataType::FLOAT64) {
+          inst->GetResultsTypes()[i] =
+              halo::Type{DataType::FLOAT32, orig_type.GetDimSizes()};
         }
       }
     }
   }
   return changed;
-} // namespace halo
+}
 
 } // end namespace halo
diff --git a/tests/unittests/lit_cases/test_tensorrt/test_einsum_batch_diagonal_tensorrt.cc b/tests/unittests/lit_cases/test_tensorrt/test_einsum_batch_diagonal_tensorrt.cc
@@ -17,12 +17,12 @@
 
 // clang-format off
 // Testing CXX Code Gen using ODLA API on tensorrt
-// RUN: %halo_compiler -target cxx -o %data_path/test_einsum_batch_diagonal/test_data_set_0/output_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_batch_diagonal/test_data_set_0/output_0.pb
-// RUN: %halo_compiler -target cxx -o %data_path/test_einsum_batch_diagonal/test_data_set_0/input_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_batch_diagonal/test_data_set_0/input_0.pb
-// RUN: %halo_compiler -target cxx -batch-size 1 %halo_compile_flags %data_path/test_einsum_batch_diagonal/model.onnx -o %t.cc
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -o %data_path/test_einsum_batch_diagonal/test_data_set_0/output_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_batch_diagonal/test_data_set_0/output_0.pb
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -o %data_path/test_einsum_batch_diagonal/test_data_set_0/input_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_batch_diagonal/test_data_set_0/input_0.pb
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -batch-size 1 %halo_compile_flags %data_path/test_einsum_batch_diagonal/model.onnx -o %t.cc
 // RUN: %cxx -c -fPIC -o %t.o %t.cc -I%odla_path/include
 // RUN: %cxx -g %s %t.o %t.bin -I%T -I%odla_path/include -I%unittests_path -I%data_path/test_einsum_batch_diagonal/test_data_set_0 %odla_link %device_link -lodla_tensorrt -o %t_tensorrt.exe -Wno-deprecated-declarations
-// RUN: %t_tensorrt.exe 0.0001 0 tensorrt %data_path/test_einsum_batch_diagonal | FileCheck %s
+// RUN: ODLA_TRT_USE_EXPLICIT_BATCH=1 %t_tensorrt.exe 0.0001 0 tensorrt %data_path/test_einsum_batch_diagonal | FileCheck %s
 // CHECK: Result Pass
 // clang-format on
 // XFAIL: *

diff --git a/tests/unittests/lit_cases/test_tensorrt/test_einsum_batch_matmul_tensorrt.cc b/tests/unittests/lit_cases/test_tensorrt/test_einsum_batch_matmul_tensorrt.cc
@@ -17,14 +17,14 @@
 
 // clang-format off
 // Testing CXX Code Gen using ODLA API on tensorrt
-// RUN: %halo_compiler -target cxx -o %data_path/test_einsum_batch_matmul/test_data_set_0/input_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_batch_matmul/test_data_set_0/input_0.pb
-// RUN: %halo_compiler -target cxx -o %data_path/test_einsum_batch_matmul/test_data_set_0/output_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_batch_matmul/test_data_set_0/output_0.pb
-// RUN: %halo_compiler -target cxx -o %data_path/test_einsum_batch_matmul/test_data_set_0/input_1.cc -x onnx -emit-data-as-c %data_path/test_einsum_batch_matmul/test_data_set_0/input_1.pb
-// RUN: %halo_compiler -target cxx -batch-size 1 %halo_compile_flags %data_path/test_einsum_batch_matmul/model.onnx -o %t.cc
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -o %data_path/test_einsum_batch_matmul/test_data_set_0/input_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_batch_matmul/test_data_set_0/input_0.pb
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -o %data_path/test_einsum_batch_matmul/test_data_set_0/output_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_batch_matmul/test_data_set_0/output_0.pb
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -o %data_path/test_einsum_batch_matmul/test_data_set_0/input_1.cc -x onnx -emit-data-as-c %data_path/test_einsum_batch_matmul/test_data_set_0/input_1.pb
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -batch-size 1 %halo_compile_flags %data_path/test_einsum_batch_matmul/model.onnx -o %t.cc
 // RUN: %cxx -c -fPIC -o %t.o %t.cc -I%odla_path/include
 // RUN: %cxx -g %s %t.o %t.bin -I%T -I%odla_path/include -I%unittests_path -I%data_path/test_einsum_batch_matmul/test_data_set_0 %odla_link %device_link -lodla_tensorrt -o %t_tensorrt.exe -Wno-deprecated-declarations
-// RUN: %t_tensorrt.exe 0.0001 0 tensorrt %data_path/test_einsum_batch_matmul | FileCheck %s
+// RUN: ODLA_TRT_USE_EXPLICIT_BATCH=1 %t_tensorrt.exe 0.0001 0 tensorrt %data_path/test_einsum_batch_matmul | FileCheck %s
 // CHECK: Result Pass
 // clang-format on
-// XFAIL: *
+
 #include "test_einsum_batch_matmul_tensorrt.cc.tmp.main.cc.in"
diff --git a/tests/unittests/lit_cases/test_tensorrt/test_einsum_inner_prod_tensorrt.cc b/tests/unittests/lit_cases/test_tensorrt/test_einsum_inner_prod_tensorrt.cc
@@ -17,14 +17,14 @@
 
 // clang-format off
 // Testing CXX Code Gen using ODLA API on tensorrt
-// RUN: %halo_compiler -target cxx -o %data_path/test_einsum_inner_prod/test_data_set_0/input_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_inner_prod/test_data_set_0/input_0.pb
-// RUN: %halo_compiler -target cxx -o %data_path/test_einsum_inner_prod/test_data_set_0/output_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_inner_prod/test_data_set_0/output_0.pb
-// RUN: %halo_compiler -target cxx -o %data_path/test_einsum_inner_prod/test_data_set_0/input_1.cc -x onnx -emit-data-as-c %data_path/test_einsum_inner_prod/test_data_set_0/input_1.pb
-// RUN: %halo_compiler -target cxx -batch-size 1 %halo_compile_flags %data_path/test_einsum_inner_prod/model.onnx -o %t.cc
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -o %data_path/test_einsum_inner_prod/test_data_set_0/input_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_inner_prod/test_data_set_0/input_0.pb
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -o %data_path/test_einsum_inner_prod/test_data_set_0/output_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_inner_prod/test_data_set_0/output_0.pb
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -o %data_path/test_einsum_inner_prod/test_data_set_0/input_1.cc -x onnx -emit-data-as-c %data_path/test_einsum_inner_prod/test_data_set_0/input_1.pb
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -batch-size 1 %halo_compile_flags %data_path/test_einsum_inner_prod/model.onnx -o %t.cc
 // RUN: %cxx -c -fPIC -o %t.o %t.cc -I%odla_path/include
 // RUN: %cxx -g %s %t.o %t.bin -I%T -I%odla_path/include -I%unittests_path -I%data_path/test_einsum_inner_prod/test_data_set_0 %odla_link %device_link -lodla_tensorrt -o %t_tensorrt.exe -Wno-deprecated-declarations
-// RUN: %t_tensorrt.exe 0.0001 0 tensorrt %data_path/test_einsum_inner_prod | FileCheck %s
+// RUN: ODLA_TRT_USE_EXPLICIT_BATCH=1 %t_tensorrt.exe 0.0001 0 tensorrt %data_path/test_einsum_inner_prod | FileCheck %s
 // CHECK: Result Pass
 // clang-format on
-// XFAIL: *
+
 #include "test_einsum_inner_prod_tensorrt.cc.tmp.main.cc.in"
diff --git a/tests/unittests/lit_cases/test_tensorrt/test_einsum_sum_tensorrt.cc b/tests/unittests/lit_cases/test_tensorrt/test_einsum_sum_tensorrt.cc
@@ -17,13 +17,13 @@
 
 // clang-format off
 // Testing CXX Code Gen using ODLA API on tensorrt
-// RUN: %halo_compiler -target cxx -o %data_path/test_einsum_sum/test_data_set_0/output_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_sum/test_data_set_0/output_0.pb
-// RUN: %halo_compiler -target cxx -o %data_path/test_einsum_sum/test_data_set_0/input_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_sum/test_data_set_0/input_0.pb
-// RUN: %halo_compiler -target cxx -batch-size 1 %halo_compile_flags %data_path/test_einsum_sum/model.onnx -o %t.cc
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -o %data_path/test_einsum_sum/test_data_set_0/output_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_sum/test_data_set_0/output_0.pb
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -o %data_path/test_einsum_sum/test_data_set_0/input_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_sum/test_data_set_0/input_0.pb
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -batch-size 1 %halo_compile_flags %data_path/test_einsum_sum/model.onnx -o %t.cc
 // RUN: %cxx -c -fPIC -o %t.o %t.cc -I%odla_path/include
 // RUN: %cxx -g %s %t.o %t.bin -I%T -I%odla_path/include -I%unittests_path -I%data_path/test_einsum_sum/test_data_set_0 %odla_link %device_link -lodla_tensorrt -o %t_tensorrt.exe -Wno-deprecated-declarations
-// RUN: %t_tensorrt.exe 0.0001 0 tensorrt %data_path/test_einsum_sum | FileCheck %s
+// RUN: ODLA_TRT_USE_EXPLICIT_BATCH=1 %t_tensorrt.exe 0.0001 0 tensorrt %data_path/test_einsum_sum | FileCheck %s
 // CHECK: Result Pass
 // clang-format on
-// XFAIL: *
+
 #include "test_einsum_sum_tensorrt.cc.tmp.main.cc.in"
diff --git a/tests/unittests/lit_cases/test_tensorrt/test_einsum_transpose_tensorrt.cc b/tests/unittests/lit_cases/test_tensorrt/test_einsum_transpose_tensorrt.cc
@@ -17,13 +17,13 @@
 
 // clang-format off
 // Testing CXX Code Gen using ODLA API on tensorrt
-// RUN: %halo_compiler -target cxx -o %data_path/test_einsum_transpose/test_data_set_0/output_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_transpose/test_data_set_0/output_0.pb
-// RUN: %halo_compiler -target cxx -o %data_path/test_einsum_transpose/test_data_set_0/input_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_transpose/test_data_set_0/input_0.pb
-// RUN: %halo_compiler -target cxx -batch-size 1 %halo_compile_flags %data_path/test_einsum_transpose/model.onnx -o %t.cc
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -o %data_path/test_einsum_transpose/test_data_set_0/output_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_transpose/test_data_set_0/output_0.pb
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -o %data_path/test_einsum_transpose/test_data_set_0/input_0.cc -x onnx -emit-data-as-c %data_path/test_einsum_transpose/test_data_set_0/input_0.pb
+// RUN: %halo_compiler --disable-type-cast=false -target cxx -batch-size 1 %halo_compile_flags %data_path/test_einsum_transpose/model.onnx -o %t.cc
 // RUN: %cxx -c -fPIC -o %t.o %t.cc -I%odla_path/include
 // RUN: %cxx -g %s %t.o %t.bin -I%T -I%odla_path/include -I%unittests_path -I%data_path/test_einsum_transpose/test_data_set_0 %odla_link %device_link -lodla_tensorrt -o %t_tensorrt.exe -Wno-deprecated-declarations
-// RUN: %t_tensorrt.exe 0.0001 0 tensorrt %data_path/test_einsum_transpose | FileCheck %s
+// RUN: ODLA_TRT_USE_EXPLICIT_BATCH=1 %t_tensorrt.exe 0.0001 0 tensorrt %data_path/test_einsum_transpose | FileCheck %s
 // CHECK: Result Pass
 // clang-format on
-// XFAIL: *
+
 #include "test_einsum_transpose_tensorrt.cc.tmp.main.cc.in"