Update to allow large models to be checked for mobile support. (micro…

…soft#18357) ### Description  Update usability checker and related infrastructure to support checking models > 2GB. - Add ability to set flag to keep initializers as external data - we optimize the model as part of the checking so need to write out a new copy. - Handle issue with ONNX shape inferencing silently failing - use API that supports large models but requires writing the model to a new file - automate cleanup of that copy of the model ### Motivation and Context  Allow analysis of LLMs to determine gaps for mobile usage. --------- Co-authored-by: Edward Chen <[email protected]>
kleiti · Mar 22, 2024 · 0907185 · 0907185
1 parent 62e87ac
commit 0907185
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 11 deletions.
diff --git a/tools/python/util/mobile_helpers/check_model_can_use_ort_mobile_pkg.py b/tools/python/util/mobile_helpers/check_model_can_use_ort_mobile_pkg.py
@@ -10,9 +10,8 @@
 import sys
 
 import onnx
-from onnx import shape_inference
 
-from ..onnx_model_utils import get_opsets_imported
+from ..onnx_model_utils import ModelProtoWithShapeInfo, get_opsets_imported
 from ..reduced_build_config_parser import parse_config
 
 cpp_to_tensorproto_type = {
@@ -265,15 +264,13 @@ def run_check(model_path: pathlib.Path, mobile_pkg_build_config: pathlib.Path, l
     )
 
     model_file = model_path.resolve(strict=True)
-    model = onnx.load(str(model_file))
 
     # we need to run shape inferencing to populate that type info for node outputs.
     # we will get warnings if the model uses ORT contrib ops (ONNX does not have shape inferencing for those),
     # and shape inferencing will be lost downstream of those.
     # TODO: add support for checking ORT format model as it will have full type/shape info for all nodes
-    model_with_type_info = shape_inference.infer_shapes(model)
-
-    return run_check_with_model(model_with_type_info, mobile_pkg_build_config, logger)
+    model_wrapper = ModelProtoWithShapeInfo(model_file)
+    return run_check_with_model(model_wrapper.model_with_shape_info, mobile_pkg_build_config, logger)
 
 
 def main():

diff --git a/tools/python/util/mobile_helpers/usability_checker.py b/tools/python/util/mobile_helpers/usability_checker.py
@@ -13,6 +13,7 @@
 import onnx
 
 from ..onnx_model_utils import (
+    ModelProtoWithShapeInfo,
     get_producer_consumer_maps,
     is_fixed_size_tensor,
     iterate_graph_per_graph_func,
@@ -464,9 +465,9 @@ def check_shapes(graph: onnx.GraphProto, logger: Optional[logging.Logger] = None
     return dynamic_inputs, num_dynamic_values
 
 
-def checker(model_path, logger: logging.Logger):
-    model = onnx.load(model_path)
-    model_with_shape_info = onnx.shape_inference.infer_shapes(model)
+def checker(model_path: pathlib.Path, logger: logging.Logger):
+    model_with_shape_info_wrapper = ModelProtoWithShapeInfo(model_path)
+    model_with_shape_info = model_with_shape_info_wrapper.model_with_shape_info
 
     # create lookup map for efficiency
     value_to_shape = {}
@@ -541,10 +542,10 @@ def analyze_model(model_path: pathlib.Path, skip_optimize: bool = False, logger:
     with tempfile.TemporaryDirectory() as tmp:
         if not skip_optimize:
             tmp_path = pathlib.Path(tmp) / model_path.name
-            optimize_model(model_path, tmp_path)
+            optimize_model(model_path, tmp_path, use_external_initializers=True)
             model_path = tmp_path
 
-        try_eps = checker(str(model_path.resolve(strict=True)), logger)
+        try_eps = checker(model_path.resolve(strict=True), logger)
 
     return try_eps
 

diff --git a/tools/python/util/onnx_model_utils.py b/tools/python/util/onnx_model_utils.py
@@ -95,6 +95,7 @@ def optimize_model(
     output_path: pathlib.Path,
     level: ort.GraphOptimizationLevel = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC,
     log_level: int = 3,
+    use_external_initializers: bool = False,
 ):
     """
     Optimize an ONNX model using ONNX Runtime to the specified level
@@ -103,12 +104,25 @@ def optimize_model(
     :param level: onnxruntime.GraphOptimizationLevel to use. Default is ORT_ENABLE_BASIC.
     :param log_level: Log level. Defaults to Error (3) so we don't get output about unused initializers being removed.
                       Warning (2) or Info (1) may be desirable in some scenarios.
+    :param use_external_initializers: Set flag to write initializers to an external file. Required if model > 2GB.
+                                      Requires onnxruntime 1.17+
     """
     so = ort.SessionOptions()
     so.optimized_model_filepath = str(output_path.resolve())
     so.graph_optimization_level = level
     so.log_severity_level = log_level
 
+    # save using external initializers so models > 2 GB are handled
+    if use_external_initializers:
+        major, minor, rest = ort.__version__.split(".", 3)
+        if (int(major), int(minor)) >= (1, 17):
+            so.add_session_config_entry("session.optimized_model_external_initializers_file_name", "external_data.pb")
+        else:
+            raise ValueError(
+                "ONNX Runtime 1.17 or higher required to save initializers as external data when optimizing model. "
+                f"Current ONNX Runtime version is {ort.__version__}"
+            )
+
     # create session to optimize. this will write the updated model to output_path
     _ = ort.InferenceSession(str(model_path.resolve(strict=True)), so, providers=["CPUExecutionProvider"])
 
@@ -366,3 +380,34 @@ def get_optimization_level(level):
         return ort.GraphOptimizationLevel.ORT_ENABLE_ALL
 
     raise ValueError("Invalid optimization level of " + level)
+
+
+class ModelProtoWithShapeInfo:
+    """
+    Class to load an ONNX model and run shape inferencing on it to populate the ValueInfo.
+    The model_with_shape_info property will contain the updated model.
+    If the model is > 2GB and uses external data a temporary file is required to run shape inferencing successfully.
+    This helper class handles automatic removal of the temporary file.
+    """
+
+    def __init__(self, model_path: pathlib.Path):
+        """
+        :param model_path: Path to ONNX model to load and run shape inferencing on.
+        """
+
+        self.model_path = model_path
+
+        model = onnx.load(str(model_path))
+        self.model_with_shape_info = onnx.shape_inference.infer_shapes(model, strict_mode=True)
+
+        # ONNX has a silent failure from the call to infer_shapes when the model is > 2GB.
+        # We detect that by checking the nodes in the returned model.
+        self._tmp_model_path = None
+        if len(model.graph.node) > 0 and len(self.model_with_shape_info.graph.node) == 0:
+            self._tmp_model_path = pathlib.Path(model_path).with_suffix(".temp_with_shapeinf.onnx")
+            onnx.shape_inference.infer_shapes_path(str(model_path), str(self._tmp_model_path), strict_mode=True)
+            self.model_with_shape_info = onnx.load(str(self._tmp_model_path))
+
+    def __del__(self):
+        if self._tmp_model_path:
+            self._tmp_model_path.unlink(missing_ok=True)