From 80080ba1057c176c7c969075539c61ec0f514085 Mon Sep 17 00:00:00 2001 From: Taha Date: Thu, 17 Nov 2022 11:15:59 +0100 Subject: [PATCH 1/4] add runtime errors for not optimized precisions --- alonet/torch2trt/TRTEngineBuilder.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/alonet/torch2trt/TRTEngineBuilder.py b/alonet/torch2trt/TRTEngineBuilder.py index 1384f5e3..91e2e8fa 100644 --- a/alonet/torch2trt/TRTEngineBuilder.py +++ b/alonet/torch2trt/TRTEngineBuilder.py @@ -64,7 +64,7 @@ def __init__( self.INT8_allowed = INT8_allowed self.onnx_file_path = onnx_file_path self.calibrator = calibrator - self.max_workspace_size = GiB(8) + self.max_workspace_size = GiB(1) self.strict_type = strict_type self.logger = logger self.engine = None @@ -156,11 +156,17 @@ def get_engine(self): raise AttributeError("unknown profiling_verbosity") # FP16 if self.FP16_allowed: + if not builder.platform_has_fast_fp16: + raise RuntimeError("FP16 is not optimized in this platform. Check " + + "https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html#hardware-precision-matrix" + ) config.set_flag(trt.BuilderFlag.FP16) # INT8 if self.INT8_allowed: if not builder.platform_has_fast_int8: - raise RuntimeError('INT8 not supported on this platform') + raise RuntimeError("FP16 is not optimized in this platform. Check " + + "https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html#hardware-precision-matrix" + ) config.set_quantization_flag(trt.QuantizationFlag.CALIBRATE_BEFORE_FUSION) config.set_flag(trt.BuilderFlag.INT8) config.int8_calibrator = self.calibrator From a7624330ed0c14b66de552437ab23e86f0366dc8 Mon Sep 17 00:00:00 2001 From: Taha Date: Thu, 17 Nov 2022 11:16:21 +0100 Subject: [PATCH 2/4] reduce max workstation size --- alonet/torch2trt/base_exporter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/alonet/torch2trt/base_exporter.py b/alonet/torch2trt/base_exporter.py index e83f5e30..ed99789b 100644 --- a/alonet/torch2trt/base_exporter.py +++ b/alonet/torch2trt/base_exporter.py @@ -148,7 +148,6 @@ def __init__( pass elif precision.lower() == "int8": self.engine_builder.INT8_allowed = True - self.engine_builder.FP16_allowed = True self.engine_builder.strict_type = True elif precision.lower() == "fp16": self.engine_builder.FP16_allowed = True From 418775a106a4543b5668e7693170a63830b339ec Mon Sep 17 00:00:00 2001 From: Taha Date: Thu, 17 Nov 2022 11:52:35 +0100 Subject: [PATCH 3/4] refactor: max_work_size to params --- alonet/torch2trt/TRTEngineBuilder.py | 5 ++++- alonet/torch2trt/base_exporter.py | 11 ++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/alonet/torch2trt/TRTEngineBuilder.py b/alonet/torch2trt/TRTEngineBuilder.py index 91e2e8fa..550a34f1 100644 --- a/alonet/torch2trt/TRTEngineBuilder.py +++ b/alonet/torch2trt/TRTEngineBuilder.py @@ -34,6 +34,7 @@ def __init__( logger=None, opt_profiles: Dict[str, Tuple[List[int]]] = None, profiling_verbosity: str = "LAYER_NAMES_ONLY", + max_workspace_size: int = 1, ): """ Parameters @@ -50,6 +51,8 @@ def __init__( Used for INT8 quantization opt_profiles : Dict[str, Tuple[List[int]]], by default None Optimization profiles (one by each dynamic axis), with the minimum, minimum and maximum values. + max_workspace_size : int + Maximum work size in GiB. Raises ------ @@ -64,7 +67,7 @@ def __init__( self.INT8_allowed = INT8_allowed self.onnx_file_path = onnx_file_path self.calibrator = calibrator - self.max_workspace_size = GiB(1) + self.max_workspace_size = GiB(max_workspace_size) self.strict_type = strict_type self.logger = logger self.engine = None diff --git a/alonet/torch2trt/base_exporter.py b/alonet/torch2trt/base_exporter.py index ed99789b..75235a29 100644 --- a/alonet/torch2trt/base_exporter.py +++ b/alonet/torch2trt/base_exporter.py @@ -55,6 +55,7 @@ def __init__( opt_profiles: Dict[str, Tuple[List[int]]] = None, profiling_verbosity: int = 0, calibrator=None, + max_workspace_size: int = 1, **kwargs, ): """ @@ -93,6 +94,8 @@ def __init__( 1 : NONE (Do not print any layer information). 2 : DETAILED : (Print detailed layer information including layer names and layer parameters). Set to 2 for more layers details (preicision, type, kernel ...) when calling the EngineInspector + max_workspace_size : int + Maximum work size in GiB. Raises ------ @@ -133,7 +136,13 @@ def __init__( else: trt_logger = trt.Logger(trt.Logger.WARNING) - self.engine_builder = TRTEngineBuilder(self.onnx_path, logger=trt_logger, opt_profiles=opt_profiles, calibrator=calibrator) + self.engine_builder = TRTEngineBuilder( + self.onnx_path, + logger=trt_logger, + calibrator=calibrator, + opt_profiles=opt_profiles, + max_workspace_size=max_workspace_size + ) if profiling_verbosity == 0: self.engine_builder.profiling_verbosity = "LAYER_NAMES_ONLY" From 5eee7978eee1b166c58ba0205ef6e42b4bb1f29c Mon Sep 17 00:00:00 2001 From: Taha Date: Thu, 17 Nov 2022 11:56:47 +0100 Subject: [PATCH 4/4] refactor : from error to warning --- alonet/torch2trt/TRTEngineBuilder.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/alonet/torch2trt/TRTEngineBuilder.py b/alonet/torch2trt/TRTEngineBuilder.py index 550a34f1..482216d3 100644 --- a/alonet/torch2trt/TRTEngineBuilder.py +++ b/alonet/torch2trt/TRTEngineBuilder.py @@ -9,6 +9,7 @@ from typing import Dict, List, Tuple +from time import sleep def GiB(val): @@ -160,16 +161,20 @@ def get_engine(self): # FP16 if self.FP16_allowed: if not builder.platform_has_fast_fp16: - raise RuntimeError("FP16 is not optimized in this platform. Check " + + print("FP16 is not optimized in this platform. Check " + "https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html#hardware-precision-matrix" ) + # Fast logs, sleep for logs readability + sleep(0.5) config.set_flag(trt.BuilderFlag.FP16) # INT8 if self.INT8_allowed: if not builder.platform_has_fast_int8: - raise RuntimeError("FP16 is not optimized in this platform. Check " + + print("FP16 is not optimized in this platform. Check " + "https://docs.nvidia.com/deeplearning/tensorrt/support-matrix/index.html#hardware-precision-matrix" ) + # Fast logs, sleep for logs readability + sleep(0.5) config.set_quantization_flag(trt.QuantizationFlag.CALIBRATE_BEFORE_FUSION) config.set_flag(trt.BuilderFlag.INT8) config.int8_calibrator = self.calibrator