microsoft · carzh · Jun 21, 2024 · Jun 6, 2024 · Jun 6, 2024 · Jun 12, 2024
diff --git a/orttraining/orttraining/python/training/artifacts.py b/orttraining/orttraining/python/training/artifacts.py
@@ -37,7 +37,7 @@
 
 
 def generate_artifacts(
-    model: onnx.ModelProto,
+    model: Union[onnx.ModelProto, str],
     requires_grad: Optional[List[str]] = None,
     frozen_params: Optional[List[str]] = None,
     loss: Optional[Union[LossType, onnxblock.Block]] = None,
@@ -61,7 +61,8 @@
     All generated ModelProtos will use the same opsets defined by *model*.
 
     Args:
-        model: The base model to be used for gradient graph generation.
+        model: The base model or path to the base model to be used for gradient graph generation. For models >2GB, 
+            use the path to the base model.
         requires_grad: List of names of model parameters that require gradient computation
         frozen_params: List of names of model parameters that should be frozen.
         loss: The loss function enum or onnxblock to be used for training. If None, no loss node is added to the graph.
@@ -159,18 +160,20 @@
     training_model = None
     eval_model = None
     model_params = None
+    loaded_model = onnx.load(model) if isinstance(model, str) else model
+    model_path = model if isinstance(model, str) else None
 
     custom_op_library_path = None
     if custom_op_library is not None:
         logging.info("Custom op library provided: %s", custom_op_library)
         custom_op_library_path = pathlib.Path(custom_op_library)
 
-    with onnxblock.base(model), (
+    with onnxblock.base(loaded_model, model_path), (
         onnxblock.custom_op_library(custom_op_library_path)
         if custom_op_library is not None
         else contextlib.nullcontext()
     ):
-        _ = training_block(*[output.name for output in model.graph.output])
+        _ = training_block(*[output.name for output in loaded_model.graph.output])
         training_model, eval_model = training_block.to_model_proto()
         model_params = training_block.parameters()
 
@@ -220,7 +223,7 @@
         return
 
     opset_version = None
-    for domain in model.opset_import:
+    for domain in loaded_model.opset_import:
         if domain.domain == "" or domain.domain == "ai.onnx":
             opset_version = domain.version
             break

diff --git a/orttraining/orttraining/python/training/onnxblock/blocks.py b/orttraining/orttraining/python/training/onnxblock/blocks.py
@@ -47,7 +47,10 @@ def __call__(self, *args, **kwargs):
 
         output = self.build(*args, **kwargs)
 
-        onnx.checker.check_model(self.base, True)
+        if accessor._GLOBAL_ACCESSOR.has_path:
+            onnx.checker.check_model(accessor._GLOBAL_ACCESSOR.path, True)
+        else:
+            onnx.checker.check_model(self.base, True)
 
         return output
 

diff --git a/orttraining/orttraining/python/training/onnxblock/model_accessor.py b/orttraining/orttraining/python/training/onnxblock/model_accessor.py
@@ -15,10 +15,12 @@ class ModelAccessor:
 
     Attributes:
         model: The onnx model that is manipulated by the onnx blocks.
+        model_path: The path to the base model. Can be None.
     """
 
-    def __init__(self, model: onnx.ModelProto):
+    def __init__(self, model: onnx.ModelProto, model_path: str):
         self._model = model
+        self._path = model_path
 
     @property
     def model(self) -> onnx.ModelProto:
@@ -30,6 +32,22 @@ def model(self) -> onnx.ModelProto:
             )
         return self._model
 
+    @property
+    def path(self) -> str:
+        """ModelAccessor property that gets the path to the base model."""
+
+        if self._path is None:
+            raise RuntimeError(
+                "The path to the onnx model was not set. Please use the context manager onnxblock.onnx_model to create the model and pass in a string."
+            )
+        return self._path
+
+    @property
+    def has_path(self) -> bool:
+        """Returns True if ModelAccessor has a path to a model, False otherwise."""
+
+        return self._path is not None
+
 
 # These variable resides in the global namespace.
 # Different methods can access this global model and manipulate it.
@@ -39,7 +57,7 @@ def model(self) -> onnx.ModelProto:
 
 
 @contextmanager
-def base(model: onnx.ModelProto):
+def base(model: onnx.ModelProto, model_path: str):
     """Registers the base model to be manipulated by the onnx blocks.
 
     Example:
@@ -53,6 +71,7 @@ def base(model: onnx.ModelProto):
 
     Args:
         model: The base model to be manipulated by the onnx blocks.
+        model_path: The path to the base model. None if there is no model path to pass in.
 
     Returns:
         ModelAccessor: The model accessor that contains the modified model.
@@ -69,7 +88,7 @@ def base(model: onnx.ModelProto):
             "model from scratch."
         )
 
-    _GLOBAL_ACCESSOR = ModelAccessor(model_clone)
+    _GLOBAL_ACCESSOR = ModelAccessor(model_clone, model_path)
     try:
         yield _GLOBAL_ACCESSOR
     finally:
@@ -112,7 +131,7 @@ def empty_base(opset_version: int | None = None):
         )
     )
 
-    _GLOBAL_ACCESSOR = ModelAccessor(model)
+    _GLOBAL_ACCESSOR = ModelAccessor(model, None)
     try:
         yield _GLOBAL_ACCESSOR
     finally:

diff --git a/orttraining/orttraining/python/training/onnxblock/onnxblock.py b/orttraining/orttraining/python/training/onnxblock/onnxblock.py
@@ -70,7 +70,12 @@ def __call__(self, *args, **kwargs):
 
         output = self.build(*args, **kwargs)
 
-        self._model = onnx.shape_inference.infer_shapes(accessor._GLOBAL_ACCESSOR.model)
+        if accessor._GLOBAL_ACCESSOR.has_path:
+            onnx.shape_inference.infer_shapes_path(accessor._GLOBAL_ACCESSOR.path)
+            # shape inferenced model is saved to original path
+            self._model = onnx.load(accessor._GLOBAL_ACCESSOR.path)
+        else:
+            self._model = onnx.shape_inference.infer_shapes(accessor._GLOBAL_ACCESSOR.model)
 
         _graph_utils.register_graph_outputs(self._model, output)
 
@@ -187,7 +192,13 @@ def __call__(self, *args, **kwargs):
 
         output = self.build(*args, **kwargs)
 
-        model = onnx.shape_inference.infer_shapes(accessor._GLOBAL_ACCESSOR.model)
+        model = None
+        if accessor._GLOBAL_ACCESSOR.has_path:
+            onnx.shape_inference.infer_shapes_path(accessor._GLOBAL_ACCESSOR.path)
+            # shape inferenced model is saved to original path
+            model = onnx.load(accessor._GLOBAL_ACCESSOR.path)
+        else:
+            model = onnx.shape_inference.infer_shapes(accessor._GLOBAL_ACCESSOR.model)
 
         _graph_utils.register_graph_outputs(model, output)