microsoft · justinchuby · Jan 4, 2025 · Dec 31, 2024 · Dec 31, 2024 · Dec 31, 2024
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -51,11 +51,12 @@ jobs:
           python-version: "3.10"
       - name: Install ONNXScript
         run: |
-          # The code is from azure-pipelines.yml
           # Install dependencies
           python -m pip install --upgrade pip
           python -m pip install --upgrade setuptools
-          python -m pip install -q -r requirements-dev.txt
+          python -m pip install -r requirements-dev.txt
+          # FIXME: numpy 2.2 has some typing changes that break the mypy CI but it's otherwise fine
+          python -m pip install "numpy<2.2"
           # Install packages
           python -m pip install -e .
           lintrunner init

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -26,27 +26,22 @@ jobs:
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
         name:
-          - py312-torch-nightly
+          - py312
           - py311
           - py311-torch-nightly
           - py311-onnx-weekly
           - py311-ort-nightly
-          - py311-experimental-torchlib-tracing
           - py310
-          - py39
         include:
+          - name: py312
+            python-version: "3.12"
+            nox-tag: test build
           - name: py311
             python-version: "3.11"
-            nox-tag: test build
+            nox-tag: test
           - name: py310
             python-version: "3.10"
             nox-tag: test
-          - name: py39
-            python-version: "3.9"
-            nox-tag: test
-          - name: py312-torch-nightly
-            python-version: "3.12"
-            nox-tag: test-torch-nightly
           - name: py311-torch-nightly
             python-version: "3.11"
             nox-tag: test-torch-nightly
@@ -56,9 +51,6 @@ jobs:
           - name: py311-ort-nightly
             python-version: "3.11"
             nox-tag: test-ort-nightly
-          - name: py311-experimental-torchlib-tracing
-            python-version: "3.11"
-            nox-tag: test-experimental-torchlib-tracing
     runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v4
@@ -92,32 +84,6 @@ jobs:
           name: Error reports (${{ matrix.name }}-${{ matrix.os }})
           path: error_reports
 
-  dort:
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest]
-        transformers: ["4.37.2", "4.41.2", "4.42.3"]
-        torch: ["release", "nightly"]
-        python_version: ["3.11"]
-        nox-tag: ["test-dort"]
-        name:
-          - dort
-    runs-on: ${{ matrix.os }}
-    steps:
-      - uses: actions/checkout@v4
-      - name: Setup Python ${{ matrix.python_version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python_version }}
-      - name: Install nox
-        run: python -m pip install nox
-      - name: Pull Test Data
-        run: git lfs pull
-      - run: |
-          nox -t ${{ matrix.nox-tag }} --forcecolor -- ${{ matrix.torch }} ${{ matrix.transformers }}
-        name: Run tests
-
   build_docs:
     strategy:
       fail-fast: false

diff --git a/noxfile.py b/noxfile.py
@@ -30,10 +30,10 @@
     "typing_extensions",
     "ml-dtypes",
 )
-ONNX = "onnx==1.16"
-ONNX_RUNTIME = "onnxruntime==1.17.1"
-PYTORCH = "torch==2.3.1"
-TORCHVISON = "torchvision==0.18.1"
+ONNX = "onnx==1.17"
+ONNX_RUNTIME = "onnxruntime==1.20.1"
+PYTORCH = "torch==2.4.1"
+TORCHVISON = "torchvision==0.19.1"
 TRANSFORMERS = "transformers==4.37.2"
 ONNX_RUNTIME_NIGHTLY_DEPENDENCIES = (
     "flatbuffers",
@@ -104,6 +104,7 @@ def test_ort_nightly(session):
         PYTORCH,
         TORCHVISON,
         ONNX,
+        TRANSFORMERS,
         *ONNX_RUNTIME_NIGHTLY_DEPENDENCIES,
     )
     session.install("-r", "requirements/ci/requirements-ort-nightly.txt")
@@ -132,32 +133,3 @@ def test_experimental_torchlib_tracing(session):
         *session.posargs,
         env={"TORCHLIB_EXPERIMENTAL_PREFER_TRACING": "1"},
     )
-
-
-@nox.session(tags=["test-dort"])
-def test_dort(session):
-    """Test the conversion of a couple of models from transformers."""
-    session.install(
-        *COMMON_TEST_DEPENDENCIES,
-    )
-    torch_version, transformers_version = session.posargs
-
-    if torch_version == "nightly":
-        session.install(
-            "--pre",
-            "torch",
-            "torchvision",
-            "torchaudio",
-            "--index-url",
-            "https://download.pytorch.org/whl/nightly/cpu",
-        )
-    else:
-        session.install("torch", "torchvision", "torchaudio")
-
-    session.install("torch", "torchvision", "torchaudio")
-    session.install(f"transformers=={transformers_version}")
-    session.install("onnxruntime-training==1.17.1")
-
-    session.run("pip", "list")
-    session.run("pytest", "onnxscript")
-    session.run("pytest", "tests")
diff --git a/onnxscript/evaluator.py b/onnxscript/evaluator.py
@@ -387,8 +387,10 @@ def _numpy_to_onnxscript_value(
 ):
     """Converts an ORT encoding of an ONNX value into the encoding used by onnxscript."""
     if isinstance(v, np.ndarray):
-        return tensor.Tensor(v)
-    if np.issctype(type(v)):  # noqa: NPY201
+        # ORT may reuse buffers when the output numpy array is provided back as input.
+        # We need to make a copy to ensure that the tensor is not modified in-place.
+        return tensor.Tensor(v.copy())
+    if issubclass(type(v), np.generic):
         # Numpy scalar types that are not ndarray
         # https://numpy.org/doc/stable/reference/arrays.scalars.html
         return tensor.Tensor(np.array(v))

diff --git a/onnxscript/function_libs/torch_lib/ops/core.py b/onnxscript/function_libs/torch_lib/ops/core.py
@@ -8272,20 +8272,14 @@ def aten_to_sparse_csr(self: TensorType) -> TensorType:
     raise NotImplementedError()
 
 
-@torch_op("aten::topk", traceable=True)
+@torch_op("aten::topk", trace_only=True)
 def aten_topk(
-    self: TReal, k: INT64, dim: int = -1, largest: bool = True, sorted: bool = True
+    self: TReal, k: int, dim: int = -1, largest: bool = True, sorted: bool = True
 ) -> Tuple[TReal, INT64]:
     """topk(Tensor self, int k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices)"""
 
-    self_is_scalar = IsScalar(self)
-    if self_is_scalar:
-        self = op.Unsqueeze(self, op.Constant(value_ints=[0]))
-    k = op.Reshape(op.Cast(k, to=INT64.dtype), op.Constant(value_ints=[1]))
-    values, indices = op.TopK(self, k, axis=dim, largest=largest, sorted=sorted)
-    if self_is_scalar:
-        values = op.Squeeze(values, op.Constant(value_ints=[0]))
-        indices = op.Squeeze(indices, op.Constant(value_ints=[0]))
+    # We do not handle scalar inputs for topk
+    values, indices = op.TopK(self, [k], axis=dim, largest=largest, sorted=sorted)
     return values, indices
 
 

diff --git a/onnxscript/optimizer/_constant_folding.py b/onnxscript/optimizer/_constant_folding.py
@@ -16,7 +16,6 @@
 import onnx.reference.ops
 
 import onnxscript.ir as ir
-import onnxscript.ir._convenience as _convenience
 import onnxscript.rewriter.pattern as orp
 import onnxscript.utils.utils as utils
 
@@ -242,10 +241,12 @@ def _get_numpy_value(val: ir.Value | None) -> np.ndarray | None:
     const_value = val.const_value
     if const_value is not None:
         try:
-            return const_value.numpy()
+            array = const_value.numpy()
         except FileNotFoundError:
             # External data is not available.
             return None
+        assert isinstance(array, np.ndarray)
+        return array
     return None
 
 
@@ -255,14 +256,7 @@ def _get_bool_value(val: ir.Value | None) -> bool | None:
     value = _get_numpy_value(val)
     if value is None:
         return None
-    # TODO: cleanup following checks, which seem redundant. But need to also ensure
-    # the invariant when setting the value (and also use clearly defined representation
-    # types in evaluators, such a reference-evaluator).
-    if isinstance(value, bool):
-        return value
-    if isinstance(value, np.bool_):
-        return bool(value)
-    if isinstance(value, np.ndarray) and value.size == 1 and value.dtype == bool:
+    if value.size == 1 and value.dtype == np.bool_:
         return value.item(0)
     return None
 
@@ -716,10 +710,6 @@ def get_type(value: ir.Value) -> onnx.TypeProto | None:
                 )
 
     def new_constant(self, irvalue: ir.Value, value):
-        # TODO(rama): Why do we need the conversion below?
-        if isinstance(value, (int, float, np.ScalarType)):
-            value = np.array(value)
-
         if not isinstance(value, np.ndarray):
             # ONNX does not have a way to represent non-tensor constants, eg. a sequence.
             # So, a constant-value of type sequence is not folded, but it can be used
@@ -731,7 +721,9 @@ def new_constant(self, irvalue: ir.Value, value):
             )
             return None
 
-        irvalue.const_value = _convenience.tensor(value)
+        tensor = ir.tensor(value)
+        tensor.name = irvalue.name
+        irvalue.const_value = tensor
 
         if value.nbytes > self._output_size_limit:
             logger.info(
@@ -741,17 +733,20 @@ def new_constant(self, irvalue: ir.Value, value):
             )
             return None
 
-        tensor = onnx.numpy_helper.from_array(value, irvalue.name)
-
         logger.debug(
             "New constant for value %s dtype: %s shape: %s",
             irvalue.name,
             value.dtype,
             value.shape,
         )
 
-        attributes = _convenience.convert_attributes({"value": tensor})
-        node = ir.Node("", "Constant", inputs=[], attributes=attributes, num_outputs=1)
+        node = ir.Node(
+            "",
+            "Constant",
+            inputs=[],
+            attributes=ir.convenience.convert_attributes({"value": tensor}),
+            num_outputs=1,
+        )
         return node
 
     def process_node(self, node: ir.Node):
@@ -837,7 +832,7 @@ def convert(av):
     def replace_node(self, node: ir.Node, replacement, root: ir.Graph | ir.Function):
         logger.debug("Replacing node: %s::%s %s", node.domain, node.op_type, node.name)
 
-        _convenience.replace_nodes_and_values(
+        ir.convenience.replace_nodes_and_values(
             root, node, [node], replacement.new_nodes, node.outputs, replacement.new_outputs
         )
 

diff --git a/onnxscript/rewriter/broadcast_to_matmul.py b/onnxscript/rewriter/broadcast_to_matmul.py
@@ -55,7 +55,7 @@ def check_if_not_need_reshape(
         return False
     input_a_shape = input_a_shape.numpy()  # type: ignore[assignment]
     input_b_shape = input_b_shape.numpy()  # type: ignore[assignment]
-    shape_c = shape_c_tensor.numpy().tolist()
+    shape_c = shape_c_tensor.numpy().tolist()  # type: ignore[assignment]
 
     a_rank = len(input_a_shape)
     b_rank = len(input_b_shape)

diff --git a/onnxscript/tools/transformers_models/llama_test.py b/onnxscript/tools/transformers_models/llama_test.py
@@ -2,7 +2,6 @@
 # Licensed under the MIT License.
 # pylint: disable=not-callable
 
-import copy
 import sys
 import unittest
 
@@ -111,33 +110,6 @@ def test_llama_export_cuda(self):
         results = sess.run(None, feeds)
         np.testing.assert_allclose(expected[0].detach().cpu().numpy(), results[0], atol=1e-5)
 
-    @unittest.skipIf(sys.platform == "win32", reason="not supported yet on Windows")
-    @unittest.skipIf(not has_transformers(), reason="transformers is missing")
-    @unittest.skipIf(torch_older_than("2.4"), reason="fails to export")
-    @ignore_warnings(UserWarning)
-    def test_llama_dort_static(self):
-        model, input_tensors_many, _ = (
-            onnxscript.tools.transformers_models.llama.get_llama_model()
-        )
-        input_tensors = input_tensors_many[0]
-        expected = model(*input_tensors)
-
-        local_aot_ort = onnxscript.tools.training_helper.make_aot_ort(dynamic=False)
-
-        compiled_model = torch.compile(
-            copy.deepcopy(model),
-            backend=local_aot_ort,
-            dynamic=False,
-            fullgraph=True,
-        )
-
-        results = compiled_model(*input_tensors)
-        torch.testing.assert_close(expected[0], results[0], atol=1e-5, rtol=1e-5)
-
-        expected_gradients = onnxscript.tools.training_helper.train_loop(model, *input_tensors)
-        gradients = onnxscript.tools.training_helper.train_loop(compiled_model, *input_tensors)
-        torch.testing.assert_close(expected_gradients[0], gradients[0], atol=1.0e-5, rtol=1e-5)
-
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnxscript/tools/transformers_models/mistral_test.py b/onnxscript/tools/transformers_models/mistral_test.py
@@ -2,7 +2,6 @@
 # Licensed under the MIT License.
 # pylint: disable=not-callable
 
-import copy
 import sys
 import unittest
 
@@ -18,7 +17,6 @@
 from onnxscript._internal.version_utils import (
     has_transformers,
     ignore_warnings,
-    onnxruntime_older_than,
     torch_older_than,
     transformers_older_than,
 )
@@ -113,33 +111,6 @@ def test_phi_export_cuda(self):
         results = sess.run(None, feeds)
         np.testing.assert_allclose(expected[0].detach().cpu().numpy(), results[0], atol=1e-5)
 
-    @unittest.skipIf(sys.platform == "win32", reason="not supported yet on Windows")
-    @unittest.skipIf(not has_transformers(), reason="transformers is missing")
-    @unittest.skipIf(onnxruntime_older_than("1.18.0"), reason="Trilu not imeplemnted")
-    @ignore_warnings(UserWarning)
-    def test_mistral_dort_static(self):
-        model, input_tensors_many, _ = (
-            onnxscript.tools.transformers_models.mistral.get_mistral_model()
-        )
-        input_tensors = input_tensors_many[0]
-        expected = model(*input_tensors)
-
-        local_aot_ort = onnxscript.tools.training_helper.make_aot_ort(dynamic=False)
-
-        compiled_model = torch.compile(
-            copy.deepcopy(model),
-            backend=local_aot_ort,
-            dynamic=False,
-            fullgraph=True,
-        )
-
-        results = compiled_model(*input_tensors)
-        torch.testing.assert_close(expected[0], results[0], atol=1e-5, rtol=1e-5)
-
-        expected_gradients = onnxscript.tools.training_helper.train_loop(model, *input_tensors)
-        gradients = onnxscript.tools.training_helper.train_loop(compiled_model, *input_tensors)
-        torch.testing.assert_close(expected_gradients[0], gradients[0], atol=1e-5, rtol=1e-5)
-
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)