From 80080ba1057c176c7c969075539c61ec0f514085 Mon Sep 17 00:00:00 2001
From: Taha <taha.azzim@gmail.com>
Date: Thu, 17 Nov 2022 11:15:59 +0100
Subject: [PATCH 1/4] add runtime errors for not optimized precisions

---
 alonet/torch2trt/TRTEngineBuilder.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/alonet/torch2trt/TRTEngineBuilder.py b/alonet/torch2trt/TRTEngineBuilder.py
index 1384f5e3..91e2e8fa 100644
--- a/alonet/torch2trt/TRTEngineBuilder.py
+++ b/alonet/torch2trt/TRTEngineBuilder.py
@@ -64,7 +64,7 @@ def __init__(
         self.INT8_allowed = INT8_allowed
         self.onnx_file_path = onnx_file_path
         self.calibrator = calibrator
-        self.max_workspace_size = GiB(8)
+        self.max_workspace_size = GiB(1)
         self.strict_type = strict_type
         self.logger = logger
         self.engine = None
@@ -156,11 +156,17 @@ def get_engine(self):
                 raise AttributeError("unknown profiling_verbosity")
             # FP16
             if self.FP16_allowed:
+                if not builder.platform_has_fast_fp16:
+                    raise RuntimeError("FP16 is not optimized in this platform. Check " +
+                    "https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html#hardware-precision-matrix"
+                    )
                 config.set_flag(trt.BuilderFlag.FP16)
             # INT8
             if self.INT8_allowed:
                 if not builder.platform_has_fast_int8:
-                    raise RuntimeError('INT8 not supported on this platform')
+                    raise RuntimeError("FP16 is not optimized in this platform. Check " +
+                    "https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html#hardware-precision-matrix"
+                    )
                 config.set_quantization_flag(trt.QuantizationFlag.CALIBRATE_BEFORE_FUSION)
                 config.set_flag(trt.BuilderFlag.INT8)
                 config.int8_calibrator = self.calibrator

From a7624330ed0c14b66de552437ab23e86f0366dc8 Mon Sep 17 00:00:00 2001
From: Taha <taha.azzim@gmail.com>
Date: Thu, 17 Nov 2022 11:16:21 +0100
Subject: [PATCH 2/4] reduce max workstation size

---
 alonet/torch2trt/base_exporter.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/alonet/torch2trt/base_exporter.py b/alonet/torch2trt/base_exporter.py
index e83f5e30..ed99789b 100644
--- a/alonet/torch2trt/base_exporter.py
+++ b/alonet/torch2trt/base_exporter.py
@@ -148,7 +148,6 @@ def __init__(
             pass
         elif precision.lower() == "int8":
             self.engine_builder.INT8_allowed = True
-            self.engine_builder.FP16_allowed = True
             self.engine_builder.strict_type = True
         elif precision.lower() == "fp16":
             self.engine_builder.FP16_allowed = True

From 418775a106a4543b5668e7693170a63830b339ec Mon Sep 17 00:00:00 2001
From: Taha <taha.azzim@gmail.com>
Date: Thu, 17 Nov 2022 11:52:35 +0100
Subject: [PATCH 3/4] refactor: max_work_size to params

---
 alonet/torch2trt/TRTEngineBuilder.py |  5 ++++-
 alonet/torch2trt/base_exporter.py    | 11 ++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/alonet/torch2trt/TRTEngineBuilder.py b/alonet/torch2trt/TRTEngineBuilder.py
index 91e2e8fa..550a34f1 100644
--- a/alonet/torch2trt/TRTEngineBuilder.py
+++ b/alonet/torch2trt/TRTEngineBuilder.py
@@ -34,6 +34,7 @@ def __init__(
         logger=None,
         opt_profiles: Dict[str, Tuple[List[int]]] = None,
         profiling_verbosity: str = "LAYER_NAMES_ONLY",
+        max_workspace_size: int = 1,
     ):
         """
         Parameters
@@ -50,6 +51,8 @@ def __init__(
             Used for INT8 quantization
         opt_profiles : Dict[str, Tuple[List[int]]], by default None
             Optimization profiles (one by each dynamic axis), with the minimum, minimum and maximum values.
+        max_workspace_size : int
+            Maximum work size in GiB.
 
         Raises
         ------
@@ -64,7 +67,7 @@ def __init__(
         self.INT8_allowed = INT8_allowed
         self.onnx_file_path = onnx_file_path
         self.calibrator = calibrator
-        self.max_workspace_size = GiB(1)
+        self.max_workspace_size = GiB(max_workspace_size)
         self.strict_type = strict_type
         self.logger = logger
         self.engine = None
diff --git a/alonet/torch2trt/base_exporter.py b/alonet/torch2trt/base_exporter.py
index ed99789b..75235a29 100644
--- a/alonet/torch2trt/base_exporter.py
+++ b/alonet/torch2trt/base_exporter.py
@@ -55,6 +55,7 @@ def __init__(
         opt_profiles: Dict[str, Tuple[List[int]]] = None,
         profiling_verbosity: int = 0,
         calibrator=None,
+        max_workspace_size: int = 1,
         **kwargs,
     ):
         """
@@ -93,6 +94,8 @@ def __init__(
                 1 : NONE (Do not print any layer information).
                 2 : DETAILED : (Print detailed layer information including layer names and layer parameters).
             Set to 2 for more layers details (preicision, type, kernel ...) when calling the EngineInspector
+        max_workspace_size : int
+            Maximum work size in GiB.
 
         Raises
         ------
@@ -133,7 +136,13 @@ def __init__(
         else:
             trt_logger = trt.Logger(trt.Logger.WARNING)
 
-        self.engine_builder = TRTEngineBuilder(self.onnx_path, logger=trt_logger, opt_profiles=opt_profiles, calibrator=calibrator)
+        self.engine_builder = TRTEngineBuilder(
+            self.onnx_path,
+            logger=trt_logger,
+            calibrator=calibrator,
+            opt_profiles=opt_profiles,
+            max_workspace_size=max_workspace_size
+            )
 
         if profiling_verbosity == 0:
             self.engine_builder.profiling_verbosity = "LAYER_NAMES_ONLY"

From 5eee7978eee1b166c58ba0205ef6e42b4bb1f29c Mon Sep 17 00:00:00 2001
From: Taha <taha.azzim@gmail.com>
Date: Thu, 17 Nov 2022 11:56:47 +0100
Subject: [PATCH 4/4] refactor : from error to warning

---
 alonet/torch2trt/TRTEngineBuilder.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/alonet/torch2trt/TRTEngineBuilder.py b/alonet/torch2trt/TRTEngineBuilder.py
index 550a34f1..482216d3 100644
--- a/alonet/torch2trt/TRTEngineBuilder.py
+++ b/alonet/torch2trt/TRTEngineBuilder.py
@@ -9,6 +9,7 @@
 
 
 from typing import Dict, List, Tuple
+from time import sleep
 
 
 def GiB(val):
@@ -160,16 +161,20 @@ def get_engine(self):
             # FP16
             if self.FP16_allowed:
                 if not builder.platform_has_fast_fp16:
-                    raise RuntimeError("FP16 is not optimized in this platform. Check " +
+                    print("FP16 is not optimized in this platform. Check " +
                     "https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html#hardware-precision-matrix"
                     )
+                    # Fast logs, sleep for logs readability
+                    sleep(0.5)
                 config.set_flag(trt.BuilderFlag.FP16)
             # INT8
             if self.INT8_allowed:
                 if not builder.platform_has_fast_int8:
-                    raise RuntimeError("FP16 is not optimized in this platform. Check " +
+                    print("FP16 is not optimized in this platform. Check " +
                     "https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html#hardware-precision-matrix"
                     )
+                    # Fast logs, sleep for logs readability
+                    sleep(0.5)
                 config.set_quantization_flag(trt.QuantizationFlag.CALIBRATE_BEFORE_FUSION)
                 config.set_flag(trt.BuilderFlag.INT8)
                 config.int8_calibrator = self.calibrator