Update on "[executorch][serialization] Serialize PTD files."

Introduce top-level serialization file that calls: - serialize_pte_binary for PTE file - FlatTensor.serialize_tensors for PTD files. Differential Revision: [D66523267](https://our.internmc.facebook.com/intern/diff/D66523267/) [ghstack-poisoned]
pytorch · Dec 10, 2024 · 103ac70 · 103ac70
2 parents aa13c87 + 062d47b
commit 103ac70
Show file tree

Hide file tree

Showing 73 changed files with 8,816 additions and 14,111 deletions.
diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh
@@ -41,6 +41,10 @@ case "${IMAGE_NAME}" in
     QNN_SDK=yes
     CLANG_VERSION=12
     ;;
+  executorch-ubuntu-22.04-mediatek-sdk)
+    MEDIATEK_SDK=yes
+    CLANG_VERSION=12
+    ;;
   executorch-ubuntu-22.04-clang12-android)
     LINTRUNNER=""
     CLANG_VERSION=12
@@ -77,6 +81,7 @@ docker build \
   --build-arg "BUILD_DOCS=${BUILD_DOCS}" \
   --build-arg "ARM_SDK=${ARM_SDK:-}" \
   --build-arg "QNN_SDK=${QNN_SDK:-}" \
+  --build-arg "MEDIATEK_SDK=${MEDIATEK_SDK:-}" \
   --build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
   -f "${OS}"/Dockerfile \
   "$@" \

diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile
@@ -85,5 +85,7 @@ RUN if [ -n "${ARM_SDK}" ]; then git config --global user.email "[email protected]
 
 ARG QNN_SDK
 
+ARG MEDIATEK_SDK
+
 USER ci-user
 CMD ["bash"]
diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
@@ -110,6 +110,12 @@ else
   COREML=OFF
 fi
 
+if [[ "${MODE}" =~ .*quantize_kv.* ]]; then
+  QUANTIZE_KV_CACHE=ON
+else
+  QUANTIZE_KV_CACHE=OFF
+fi
+
 echo "COREML option ${COREML}"
 
 if [[ "${MODE}" =~ .*qnn.* ]]; then
@@ -249,6 +255,9 @@ if [[ "${QNN}" == "ON" ]]; then
     EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
   fi
 fi
+if [[ "${QUANTIZE_KV_CACHE}" == "ON" ]]; then
+  EXPORT_ARGS="${EXPORT_ARGS} --quantize_kv_cache"
+fi
 # Add dynamically linked library location
 $PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
 

diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml
@@ -42,6 +42,7 @@ jobs:
           - docker-image-name: executorch-ubuntu-22.04-linter
           - docker-image-name: executorch-ubuntu-22.04-arm-sdk
           - docker-image-name: executorch-ubuntu-22.04-qnn-sdk
+          - docker-image-name: executorch-ubuntu-22.04-mediatek-sdk
           - docker-image-name: executorch-ubuntu-22.04-clang12-android
     env:
       DOCKER_IMAGE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/executorch/${{ matrix.docker-image-name }}

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -86,7 +86,7 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        mode: [portable, xnnpack+custom, xnnpack+custom+qe]
+        mode: [portable, xnnpack+custom, xnnpack+custom+qe,xnnpack+custom+quantize_kv,xnnpack+quantize_kv]
         include:
           - dtype: bf16
             mode: portable
@@ -504,3 +504,21 @@ jobs:
 
         # run llama runner in eager mode
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
+
+  test-mediatek-models-linux:
+    name: test-mediatek-models-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.24xlarge
+      docker-image: executorch-ubuntu-22.04-mediatek-sdk
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        # placeholder for mediatek to add more tests
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -225,7 +225,7 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        mode: [portable, xnnpack+kv+custom, mps, coreml]
+        mode: [portable, xnnpack+kv+custom, mps, coreml, xnnpack+custom+quantize_kv]
         include:
           - dtype: bf16
             mode: portable

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -742,9 +742,9 @@ if(EXECUTORCH_BUILD_PYBIND)
   endif()
 
   if(EXECUTORCH_BUILD_XNNPACK)
-    # need to explicitly specify XNNPACK here otherwise uses XNNPACK symbols
-    # from libtorch_cpu
-    list(APPEND _dep_libs xnnpack_backend XNNPACK)
+    # need to explicitly specify XNNPACK and microkernels-prod
+    # here otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
+    list(APPEND _dep_libs xnnpack_backend XNNPACK microkernels-prod)
   endif()
 
   # compile options for pybind

diff --git a/backends/arm/arm_backend.py b/backends/arm/arm_backend.py
@@ -13,7 +13,7 @@
 
 import logging
 import os
-from typing import final, List, Optional
+from typing import cast, final, List, Optional
 
 import serializer.tosa_serializer as ts
 from executorch.backends.arm.arm_vela import vela_compile
@@ -32,6 +32,7 @@
 from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from torch.export.exported_program import ExportedProgram
+from torch.fx import Node
 
 # TOSA backend debug functionality
 logger = logging.getLogger(__name__)
@@ -269,6 +270,7 @@ def preprocess(  # noqa: C901
         node_visitors = get_node_visitors(edge_program, tosa_spec)
         input_count = 0
         for node in graph_module.graph.nodes:
+            node = cast(Node, node)
             if node.op == "call_function":
                 process_call_function(node, tosa_graph, node_visitors, tosa_spec)
             elif node.op == "placeholder":
@@ -288,9 +290,6 @@ def preprocess(  # noqa: C901
                     "The rank of the input order is not equal to amount of input tensors"
                 )
 
-        # TODO: It would be awesome if this dump could somehow be done on top level and not here.
-        # Problem is that the desc.json has to be created on the tosa_graph object, which we can't
-        # access from top level.
         if artifact_path:
             tag = _get_first_delegation_tag(graph_module)
             dbg_tosa_dump(
@@ -311,6 +310,4 @@ def preprocess(  # noqa: C901
         else:
             raise RuntimeError(f"Unknown format {output_format}")
 
-        # Continueing from above. Can I put tosa_graph into this function?
-        # debug_handle_map = ...
         return PreprocessResult(processed_bytes=binary)
diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py
@@ -74,19 +74,15 @@ def get_tosa_compile_spec_unbuilt(
     the compile spec before calling .build() to finalize it.
     """
     if not custom_path:
-        intermediate_path = maybe_get_tosa_collate_path() or tempfile.mkdtemp(
-            prefix="arm_tosa_"
-        )
-    else:
-        intermediate_path = custom_path
+        custom_path = maybe_get_tosa_collate_path()
 
-    if not os.path.exists(intermediate_path):
-        os.makedirs(intermediate_path, exist_ok=True)
+    if custom_path is not None:
+        os.makedirs(custom_path, exist_ok=True)
     compile_spec_builder = (
         ArmCompileSpecBuilder()
         .tosa_compile_spec(tosa_version)
         .set_permute_memory_format(permute_memory_to_nhwc)
-        .dump_intermediate_artifacts_to(intermediate_path)
+        .dump_intermediate_artifacts_to(custom_path)
     )
 
     return compile_spec_builder

diff --git a/backends/arm/test/conftest.py b/backends/arm/test/conftest.py
@@ -11,7 +11,6 @@
 import shutil
 import subprocess
 import sys
-from enum import auto, Enum
 from typing import Any
 
 import pytest
@@ -22,30 +21,24 @@
 """
 
 
-class arm_test_options(Enum):
-    quantize_io = auto()
-    corstone_fvp = auto()
-    fast_fvp = auto()
-
-
-_test_options: dict[arm_test_options, Any] = {}
-
 # ==== Pytest hooks ====
 
 
 def pytest_configure(config):
+    pytest._test_options = {}
+
     if config.option.arm_quantize_io:
         _load_libquantized_ops_aot_lib()
-        _test_options[arm_test_options.quantize_io] = True
+        pytest._test_options["quantize_io"] = True
     if config.option.arm_run_corstoneFVP:
         corstone300_exists = shutil.which("FVP_Corstone_SSE-300_Ethos-U55")
         corstone320_exists = shutil.which("FVP_Corstone_SSE-320")
         if not (corstone300_exists and corstone320_exists):
             raise RuntimeError(
                 "Tests are run with --arm_run_corstoneFVP but corstone FVP is not installed."
             )
-        _test_options[arm_test_options.corstone_fvp] = True
-    _test_options[arm_test_options.fast_fvp] = config.option.fast_fvp
+        pytest._test_options["corstone_fvp"] = True
+    pytest._test_options["fast_fvp"] = config.option.fast_fvp
     logging.basicConfig(level=logging.INFO, stream=sys.stdout)
 
 
@@ -131,9 +124,7 @@ def expectedFailureOnFVP(test_item):
 # ==== End of Custom Pytest decorators =====
 
 
-def is_option_enabled(
-    option: str | arm_test_options, fail_if_not_enabled: bool = False
-) -> bool:
+def is_option_enabled(option: str, fail_if_not_enabled: bool = False) -> bool:
     """
     Returns whether an option is successfully enabled, i.e. if the flag was
     given to pytest and the necessary requirements are available.
@@ -144,10 +135,8 @@ def is_option_enabled(
     The optional parameter 'fail_if_not_enabled' makes the function raise
       a RuntimeError instead of returning False.
     """
-    if isinstance(option, str):
-        option = arm_test_options[option.lower()]
 
-    if option in _test_options and _test_options[option]:
+    if option in pytest._test_options and pytest._test_options[option]:
         return True
     else:
         if fail_if_not_enabled:
@@ -156,15 +145,15 @@ def is_option_enabled(
             return False
 
 
-def get_option(option: arm_test_options) -> Any | None:
+def get_option(option: str) -> Any | None:
     """
     Returns the value of an pytest option if it is set, otherwise None.
 
     Args:
-        option (arm_test_options): The option to check for.
+        option (str): The option to check for.
     """
-    if option in _test_options:
-        return _test_options[option]
+    if option in pytest._test_options:
+        return pytest._test_options[option]
     return None
 
 

diff --git a/backends/arm/test/misc/test_debug_feats.py b/backends/arm/test/misc/test_debug_feats.py
@@ -111,7 +111,9 @@ def test_numerical_diff_prints(self):
                 model,
                 example_inputs=model.get_inputs(),
                 compile_spec=common.get_tosa_compile_spec(
-                    "TOSA-0.80.0+MI", permute_memory_to_nhwc=True
+                    "TOSA-0.80.0+MI",
+                    permute_memory_to_nhwc=True,
+                    custom_path=tempfile.mkdtemp("diff_print_test"),
                 ),
             )
             .export()

diff --git a/backends/arm/test/ops/test_cat.py b/backends/arm/test/ops/test_cat.py
@@ -124,7 +124,7 @@ def test_cat_tosa_MI(self, operands: tuple[torch.Tensor, ...], dim: int):
     def test_cat_4d_tosa_MI(self):
         square = torch.ones((2, 2, 2, 2))
         for dim in range(-3, 3):
-            test_data = ((square, square), dim)
+            test_data = ((square, square.clone()), dim)
             self._test_cat_tosa_MI_pipeline(self.Cat(), test_data)
 
     @parameterized.expand(Cat.test_parameters)

diff --git a/backends/arm/test/ops/test_scalars.py b/backends/arm/test/ops/test_scalars.py
@@ -157,7 +157,7 @@ def _test_add_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: tuple):
     def test_MI(self, test_name: str, op: torch.nn.Module, x, y):
         expected_exception = None
         if any(token in test_name for token in ("Sub_int", "Sub__int")):
-            expected_exception = RuntimeError
+            expected_exception = ValueError
         elif test_name.endswith("_st"):
             expected_exception = AttributeError
 

diff --git a/backends/arm/test/ops/test_select.py b/backends/arm/test/ops/test_select.py
@@ -93,8 +93,6 @@ def _test_select_tosa_BI_pipeline(
             .check(["torch.ops.quantized_decomposed"])
             .to_edge()
             .partition()
-            .dump_artifact()
-            .dump_operator_distribution()
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
             .run_method_and_compare_outputs(inputs=test_data)