Add torch._export.aot_load (#2119)

Summary: Pull Request resolved: #2119 X-link: pytorch/pytorch#117610 Add a torch._export.aot_load API that can load an AOTInductor-compiled model.so into a python executable. Reviewed By: khabinov, angelayi Differential Revision: D52825456 fbshipit-source-id: 1cc2e93f4621863d2535360edca2d72a305bafdf
pytorch · Jan 18, 2024 · e85d944 · e85d944
1 parent 6a8b941
commit e85d944
Showing 1 changed file with 3 additions and 18 deletions.
diff --git a/userbenchmark/dynamo/dynamobench/common.py b/userbenchmark/dynamo/dynamobench/common.py
@@ -51,7 +51,6 @@
 import torch._dynamo.utils
 import torch._export
 import torch.distributed
-import torch.fx._pytree as fx_pytree
 import torch.multiprocessing as mp
 from scipy.stats import gmean, ttest_ind
 from torch._dynamo.profiler import fx_insert_profiling, Profiler
@@ -1126,13 +1125,7 @@ def load(cls, model, example_inputs, device):
             _register_dataclass_output_as_pytree(example_outputs)
 
             so_path = torch._export.aot_compile(model, example_args, example_kwargs)
-
-            runner = (
-                torch._C._aoti.AOTIModelContainerRunnerCpu(so_path, 1)
-                if device == "cpu"
-                else torch._C._aoti.AOTIModelContainerRunnerCuda(so_path, 1)
-            )
-            cls.cache[key] = runner
+            cls.cache[key] = torch._export.aot_load(so_path, device)
 
         return cls.cache[key]
 
@@ -1152,19 +1145,11 @@ def opt_export(_, example_inputs):
 
 
 def export_aot_inductor(model, example_inputs, device):
-    runner = AOTInductorModelCache.load(model, example_inputs, device)
-    call_spec = runner.get_call_spec()
-    in_spec = pytree.treespec_loads(call_spec[0])
-    out_spec = pytree.treespec_loads(call_spec[1])
+    optimized = AOTInductorModelCache.load(model, example_inputs, device)
 
     def opt_aot_inductor(_, example_inputs, collect_outputs=False):
         example_args, example_kwargs = _normalize_bench_inputs(example_inputs)
-
-        flat_inputs = fx_pytree.tree_flatten_spec(
-            (example_args, example_kwargs), in_spec
-        )
-        flat_outputs = runner.run(flat_inputs)
-        return pytree.tree_unflatten(flat_outputs, out_spec)
+        return optimized(example_args, example_kwargs)
 
     return opt_aot_inductor