Merge pull request #258 from Visual-Behavior/trt-warnings

Alobugdays - Trt warnings
Visual-Behavior · Nov 17, 2022 · ffbc525 · ffbc525
2 parents 7b292ed + 5eee797
commit ffbc525
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 4 deletions.
diff --git a/alonet/torch2trt/TRTEngineBuilder.py b/alonet/torch2trt/TRTEngineBuilder.py
@@ -9,6 +9,7 @@
 
 
 from typing import Dict, List, Tuple
+from time import sleep
 
 
 def GiB(val):
@@ -34,6 +35,7 @@ def __init__(
         logger=None,
         opt_profiles: Dict[str, Tuple[List[int]]] = None,
         profiling_verbosity: str = "LAYER_NAMES_ONLY",
+        max_workspace_size: int = 1,
     ):
         """
         Parameters
@@ -50,6 +52,8 @@ def __init__(
             Used for INT8 quantization
         opt_profiles : Dict[str, Tuple[List[int]]], by default None
             Optimization profiles (one by each dynamic axis), with the minimum, minimum and maximum values.
+        max_workspace_size : int
+            Maximum work size in GiB.
 
         Raises
         ------
@@ -64,7 +68,7 @@ def __init__(
         self.INT8_allowed = INT8_allowed
         self.onnx_file_path = onnx_file_path
         self.calibrator = calibrator
-        self.max_workspace_size = GiB(8)
+        self.max_workspace_size = GiB(max_workspace_size)
         self.strict_type = strict_type
         self.logger = logger
         self.engine = None
@@ -156,11 +160,21 @@ def get_engine(self):
                 raise AttributeError("unknown profiling_verbosity")
             # FP16
             if self.FP16_allowed:
+                if not builder.platform_has_fast_fp16:
+                    print("FP16 is not optimized in this platform. Check " +
+                    "https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html#hardware-precision-matrix"
+                    )
+                    # Fast logs, sleep for logs readability
+                    sleep(0.5)
                 config.set_flag(trt.BuilderFlag.FP16)
             # INT8
             if self.INT8_allowed:
                 if not builder.platform_has_fast_int8:
-                    raise RuntimeError('INT8 not supported on this platform')
+                    print("FP16 is not optimized in this platform. Check " +
+                    "https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html#hardware-precision-matrix"
+                    )
+                    # Fast logs, sleep for logs readability
+                    sleep(0.5)
                 config.set_quantization_flag(trt.QuantizationFlag.CALIBRATE_BEFORE_FUSION)
                 config.set_flag(trt.BuilderFlag.INT8)
                 config.int8_calibrator = self.calibrator

diff --git a/alonet/torch2trt/base_exporter.py b/alonet/torch2trt/base_exporter.py
@@ -55,6 +55,7 @@ def __init__(
         opt_profiles: Dict[str, Tuple[List[int]]] = None,
         profiling_verbosity: int = 0,
         calibrator=None,
+        max_workspace_size: int = 1,
         **kwargs,
     ):
         """
@@ -93,6 +94,8 @@ def __init__(
                 1 : NONE (Do not print any layer information).
                 2 : DETAILED : (Print detailed layer information including layer names and layer parameters).
             Set to 2 for more layers details (preicision, type, kernel ...) when calling the EngineInspector
+        max_workspace_size : int
+            Maximum work size in GiB.
 
         Raises
         ------
@@ -133,7 +136,13 @@ def __init__(
         else:
             trt_logger = trt.Logger(trt.Logger.WARNING)
 
-        self.engine_builder = TRTEngineBuilder(self.onnx_path, logger=trt_logger, opt_profiles=opt_profiles, calibrator=calibrator)
+        self.engine_builder = TRTEngineBuilder(
+            self.onnx_path,
+            logger=trt_logger,
+            calibrator=calibrator,
+            opt_profiles=opt_profiles,
+            max_workspace_size=max_workspace_size
+            )
 
         if profiling_verbosity == 0:
             self.engine_builder.profiling_verbosity = "LAYER_NAMES_ONLY"
@@ -148,7 +157,6 @@ def __init__(
             pass
         elif precision.lower() == "int8":
             self.engine_builder.INT8_allowed = True
-            self.engine_builder.FP16_allowed = True
             self.engine_builder.strict_type = True
         elif precision.lower() == "fp16":
             self.engine_builder.FP16_allowed = True